Ejemplo n.º 1
0
def api_for_instance(instance_name):
    if not instance_name:
        # Use environment
        return arvados.api('v1', model=OrderedJsonModel())

    if '/' in instance_name:
        config_file = instance_name
    else:
        config_file = os.path.join(os.environ['HOME'], '.config', 'arvados', "{}.conf".format(instance_name))

    try:
        cfg = arvados.config.load(config_file)
    except (IOError, OSError) as e:
        abort(("Could not open config file {}: {}\n" +
               "You must make sure that your configuration tokens\n" +
               "for Arvados instance {} are in {} and that this\n" +
               "file is readable.").format(
                   config_file, e, instance_name, config_file))

    if 'ARVADOS_API_HOST' in cfg and 'ARVADOS_API_TOKEN' in cfg:
        api_is_insecure = (
            cfg.get('ARVADOS_API_HOST_INSECURE', '').lower() in set(
                ['1', 't', 'true', 'y', 'yes']))
        client = arvados.api('v1',
                             host=cfg['ARVADOS_API_HOST'],
                             token=cfg['ARVADOS_API_TOKEN'],
                             insecure=api_is_insecure,
                             model=OrderedJsonModel())
    else:
        abort('need ARVADOS_API_HOST and ARVADOS_API_TOKEN for {}'.format(instance_name))
    return client
Ejemplo n.º 2
0
def NewSummarizer(process_or_uuid, **kwargs):
    """Construct with the appropriate subclass for this uuid/object."""

    if isinstance(process_or_uuid, dict):
        process = process_or_uuid
        uuid = process['uuid']
    else:
        uuid = process_or_uuid
        process = None
        arv = arvados.api('v1', model=OrderedJsonModel())

    if '-dz642-' in uuid:
        if process is None:
            process = arv.containers().get(uuid=uuid).execute()
        klass = ContainerTreeSummarizer
    elif '-xvhdp-' in uuid:
        if process is None:
            process = arv.container_requests().get(uuid=uuid).execute()
        klass = ContainerTreeSummarizer
    elif '-8i9sb-' in uuid:
        if process is None:
            process = arv.jobs().get(uuid=uuid).execute()
        klass = JobTreeSummarizer
    elif '-d1hrv-' in uuid:
        if process is None:
            process = arv.pipeline_instances().get(uuid=uuid).execute()
        klass = PipelineSummarizer
    elif '-4zz18-' in uuid:
        return CollectionSummarizer(collection_id=uuid)
    else:
        raise ArgumentError("Unrecognized uuid %s", uuid)
    return klass(process, uuid=uuid, **kwargs)
Ejemplo n.º 3
0
def main(args, stdout, stderr, api_client=None):
    parser = arg_parser()

    job_order_object = None
    arvargs = parser.parse_args(args)
    if arvargs.create_template and not arvargs.job_order:
        job_order_object = ({}, "")

    try:
        if api_client is None:
            api_client = arvados.api('v1', model=OrderedJsonModel())
        runner = ArvCwlRunner(api_client, work_api=arvargs.work_api)
    except Exception as e:
        logger.error(e)
        return 1

    arvargs.conformance_test = None
    arvargs.use_container = True

    return cwltool.main.main(args=arvargs,
                             stdout=stdout,
                             stderr=stderr,
                             executor=runner.arvExecutor,
                             makeTool=runner.arvMakeTool,
                             versionfunc=versionstring,
                             job_order_object=job_order_object,
                             make_fs_access=partial(CollectionFsAccess,
                                                    api_client=api_client))
Ejemplo n.º 4
0
 def test_ordered_json_model(self):
     mock_responses = {
         'arvados.humans.get':
         (None,
          json.dumps(
              collections.OrderedDict(
                  (c, int(c, 16)) for c in string.hexdigits))),
     }
     req_builder = apiclient_http.RequestMockBuilder(mock_responses)
     api = arvados.api('v1',
                       requestBuilder=req_builder,
                       model=OrderedJsonModel())
     result = api.humans().get(uuid='test').execute()
     self.assertEqual(string.hexdigits, ''.join(result.keys()))
Ejemplo n.º 5
0
    def __init__(self, root, skip_child_jobs=False, **kwargs):
        arv = arvados.api('v1', model=OrderedJsonModel())

        label = kwargs.pop('label', None) or root.get('name') or root['uuid']
        root['name'] = label

        children = collections.OrderedDict()
        todo = collections.deque((root, ))
        while len(todo) > 0:
            current = todo.popleft()
            label = current['name']
            sort_key = current['created_at']
            if current['uuid'].find('-xvhdp-') > 0:
                current = arv.containers().get(
                    uuid=current['container_uuid']).execute()

            summer = ContainerSummarizer(current, label=label, **kwargs)
            summer.sort_key = sort_key
            children[current['uuid']] = summer

            page_filters = []
            while True:
                child_crs = arv.container_requests().index(
                    order=['uuid asc'],
                    filters=page_filters +
                    [['requesting_container_uuid', '=', current['uuid']]],
                ).execute()
                if not child_crs['items']:
                    break
                elif skip_child_jobs:
                    logger.warning(
                        '%s: omitting stats from %d child containers'
                        ' because --skip-child-jobs flag is on', label,
                        child_crs['items_available'])
                    break
                page_filters = [['uuid', '>', child_crs['items'][-1]['uuid']]]
                for cr in child_crs['items']:
                    if cr['container_uuid']:
                        logger.debug('%s: container req %s', current['uuid'],
                                     cr['uuid'])
                        cr['name'] = cr.get('name') or cr['uuid']
                        todo.append(cr)
        sorted_children = collections.OrderedDict()
        for uuid in sorted(list(children.keys()),
                           key=lambda uuid: children[uuid].sort_key):
            sorted_children[uuid] = children[uuid]
        super(ContainerTreeSummarizer, self).__init__(children=sorted_children,
                                                      label=root['name'],
                                                      **kwargs)
Ejemplo n.º 6
0
 def __init__(self, pipeline_instance_uuid, **kwargs):
     arv = arvados.api('v1', model=OrderedJsonModel())
     instance = arv.pipeline_instances().get(
         uuid=pipeline_instance_uuid).execute()
     self.summarizers = collections.OrderedDict()
     for cname, component in instance['components'].iteritems():
         if 'job' not in component:
             logger.warning("%s: skipping component with no job assigned",
                            cname)
         else:
             logger.info("%s: job %s", cname, component['job']['uuid'])
             summarizer = JobSummarizer(component['job'], **kwargs)
             summarizer.label = '{} {}'.format(cname,
                                               component['job']['uuid'])
             self.summarizers[cname] = summarizer
     self.label = pipeline_instance_uuid
Ejemplo n.º 7
0
 def test_ordered_json_model(self):
     mock_responses = {
         'arvados.humans.get':
         (None,
          json.dumps(
              collections.OrderedDict(
                  (c, int(c, 16)) for c in string.hexdigits))),
     }
     req_builder = apiclient_http.RequestMockBuilder(mock_responses)
     api = arvados.api('v1',
                       host=os.environ['ARVADOS_API_HOST'],
                       token='discovery-doc-only-no-token-needed',
                       insecure=True,
                       requestBuilder=req_builder,
                       model=OrderedJsonModel())
     result = api.humans().get(uuid='test').execute()
     self.assertEqual(string.hexdigits, ''.join(result.keys()))
Ejemplo n.º 8
0
    def __init__(self, job, label=None, **kwargs):
        arv = arvados.api('v1', model=OrderedJsonModel())
        label = label or job.get('name', job['uuid'])
        children = collections.OrderedDict()
        children[job['uuid']] = JobSummarizer(job, label=label, **kwargs)
        if job.get('components', None):
            preloaded = {}
            for j in arv.jobs().index(
                    limit=len(job['components']),
                    filters=[['uuid','in',job['components'].values()]]).execute()['items']:
                preloaded[j['uuid']] = j
            for cname in sorted(job['components'].keys()):
                child_uuid = job['components'][cname]
                j = (preloaded.get(child_uuid) or
                     arv.jobs().get(uuid=child_uuid).execute())
                children[child_uuid] = JobTreeSummarizer(job=j, label=cname, **kwargs)

        super(JobTreeSummarizer, self).__init__(
            children=children,
            label=label,
            **kwargs)
Ejemplo n.º 9
0
def main(args, stdout, stderr, api_client=None, keep_client=None):
    parser = arg_parser()

    job_order_object = None
    arvargs = parser.parse_args(args)
    if (arvargs.create_template or arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
        job_order_object = ({}, "")

    add_arv_hints()

    try:
        if api_client is None:
            api_client=arvados.api('v1', model=OrderedJsonModel())
        runner = ArvCwlRunner(api_client, work_api=arvargs.work_api, keep_client=keep_client, output_name=arvargs.output_name)
    except Exception as e:
        logger.error(e)
        return 1

    if arvargs.debug:
        logger.setLevel(logging.DEBUG)

    if arvargs.quiet:
        logger.setLevel(logging.WARN)
        logging.getLogger('arvados.arv-run').setLevel(logging.WARN)

    if arvargs.metrics:
        metrics.setLevel(logging.DEBUG)
        logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)

    arvargs.conformance_test = None
    arvargs.use_container = True

    return cwltool.main.main(args=arvargs,
                             stdout=stdout,
                             stderr=stderr,
                             executor=runner.arv_executor,
                             makeTool=runner.arv_make_tool,
                             versionfunc=versionstring,
                             job_order_object=job_order_object,
                             make_fs_access=partial(CollectionFsAccess, api_client=api_client))
Ejemplo n.º 10
0
def main(stdin, stdout, stderr, arglist):
    args = parse_arguments(arglist)
    prompter = Prompter(stdin if args.interactive else None, stdout, stderr)
    git_conf_dir = get_git_dir('--git-dir')
    git_work_dir = get_git_dir('--show-toplevel')
    pipeline_config = load_pipeline_config(git_conf_dir, args.dest)
    cluster = Cluster(args.dest, pipeline_config.items(args.dest))
    if not os.path.exists(cluster.arv_conf_path):
        print("Error: No Arvados configuration at {}.".format(
            cluster.arv_conf_path),
              file=stderr)
        sys.exit(1)
    arvados.config.initialize(cluster.arv_conf_path)

    rel_template_path = pipeline_config.get_or_ask('DEFAULT', 'template_path',
                                                   ask_template_path, prompter,
                                                   stdout, git_work_dir)
    if cluster.template_uuid is None:
        cluster.template_uuid = prompter.ask(
            "Pipeline template UUID on " + args.dest,
            "No pipeline template UUID known on " + args.dest)
    setup_git(cluster, prompter)

    with open(os.path.join(git_work_dir, rel_template_path)) as pt_file:
        pipeline_template = json.load(
            pt_file, object_pairs_hook=collections.OrderedDict)
    subprocess.check_call(['git', 'push', cluster.git_push_url] +
                          args.refspecs)
    arv = api_from_config('v1', model=OrderedJsonModel())
    arv.pipeline_templates().update(uuid=cluster.template_uuid,
                                    body=pipeline_template).execute()

    config_dirty = False
    for key, value in cluster.config_changes():
        pipeline_config.set(args.dest, key, value)
        config_dirty = True
    if config_dirty:
        pipeline_config.save()
Ejemplo n.º 11
0
def main(args, stdout, stderr, api_client=None):
    parser = arg_parser()

    job_order_object = None
    arvargs = parser.parse_args(args)
    if arvargs.create_template and not arvargs.job_order:
        job_order_object = ({}, "")

    try:
        if api_client is None:
            api_client = arvados.api('v1', model=OrderedJsonModel())
        runner = ArvCwlRunner(api_client)
    except Exception as e:
        logger.error(e)
        return 1

    return cwltool.main.main(args=arvargs,
                             stdout=stdout,
                             stderr=stderr,
                             executor=runner.arvExecutor,
                             makeTool=runner.arvMakeTool,
                             versionfunc=versionstring,
                             job_order_object=job_order_object)
Ejemplo n.º 12
0
def NewSummarizer(process_or_uuid, **kwargs):
    """Construct with the appropriate subclass for this uuid/object."""

    if isinstance(process_or_uuid, dict):
        process = process_or_uuid
        uuid = process['uuid']
    else:
        uuid = process_or_uuid
        process = None
        arv = arvados.api('v1', model=OrderedJsonModel())

    if '-dz642-' in uuid:
        if process is None:
            # Get the associated CR. Doesn't matter which since they all have the same logs
            crs = arv.container_requests().list(
                filters=[['container_uuid', '=',
                          uuid]], limit=1).execute()['items']
            if len(crs) > 0:
                process = crs[0]
        klass = ContainerRequestTreeSummarizer
    elif '-xvhdp-' in uuid:
        if process is None:
            process = arv.container_requests().get(uuid=uuid).execute()
        klass = ContainerRequestTreeSummarizer
    elif '-8i9sb-' in uuid:
        if process is None:
            process = arv.jobs().get(uuid=uuid).execute()
        klass = JobTreeSummarizer
    elif '-d1hrv-' in uuid:
        if process is None:
            process = arv.pipeline_instances().get(uuid=uuid).execute()
        klass = PipelineSummarizer
    elif '-4zz18-' in uuid:
        return CollectionSummarizer(collection_id=uuid)
    else:
        raise ArgumentError("Unrecognized uuid %s", uuid)
    return klass(process, uuid=uuid, **kwargs)
Ejemplo n.º 13
0
def main(args, stdout, stderr, api_client=None):
    args.insert(0, "--leave-outputs")
    parser = cwltool.main.arg_parser()

    exgroup = parser.add_mutually_exclusive_group()
    exgroup.add_argument("--enable-reuse",
                         action="store_true",
                         default=True,
                         dest="enable_reuse",
                         help="")
    exgroup.add_argument("--disable-reuse",
                         action="store_false",
                         default=True,
                         dest="enable_reuse",
                         help="")

    parser.add_argument("--project-uuid",
                        type=str,
                        help="Project that will own the workflow jobs")
    parser.add_argument(
        "--ignore-docker-for-reuse",
        action="store_true",
        help=
        "Ignore Docker image version when deciding whether to reuse past jobs.",
        default=False)

    exgroup = parser.add_mutually_exclusive_group()
    exgroup.add_argument("--submit",
                         action="store_true",
                         help="Submit workflow to run on Arvados.",
                         default=True,
                         dest="submit")
    exgroup.add_argument(
        "--local",
        action="store_false",
        help="Run workflow on local host (submits jobs to Arvados).",
        default=True,
        dest="submit")

    exgroup = parser.add_mutually_exclusive_group()
    exgroup.add_argument(
        "--wait",
        action="store_true",
        help="After submitting workflow runner job, wait for completion.",
        default=True,
        dest="wait")
    exgroup.add_argument("--no-wait",
                         action="store_false",
                         help="Submit workflow runner job and exit.",
                         default=True,
                         dest="wait")

    try:
        if api_client is None:
            api_client = arvados.api('v1', model=OrderedJsonModel())
        runner = ArvCwlRunner(api_client)
    except Exception as e:
        logger.error(e)
        return 1

    return cwltool.main.main(args,
                             stdout=stdout,
                             stderr=stderr,
                             executor=runner.arvExecutor,
                             makeTool=runner.arvMakeTool,
                             parser=parser,
                             versionfunc=versionstring)
Ejemplo n.º 14
0
def main(args, stdout, stderr, api_client=None, keep_client=None):
    parser = arg_parser()

    job_order_object = None
    arvargs = parser.parse_args(args)

    if arvargs.version:
        print versionstring()
        return

    if arvargs.update_workflow:
        if arvargs.update_workflow.find('-7fd4e-') == 5:
            want_api = 'containers'
        elif arvargs.update_workflow.find('-p5p6p-') == 5:
            want_api = 'jobs'
        else:
            want_api = None
        if want_api and arvargs.work_api and want_api != arvargs.work_api:
            logger.error(
                '--update-workflow arg {!r} uses {!r} API, but --api={!r} specified'
                .format(arvargs.update_workflow, want_api, arvargs.work_api))
            return 1
        arvargs.work_api = want_api

    if (arvargs.create_workflow
            or arvargs.update_workflow) and not arvargs.job_order:
        job_order_object = ({}, "")

    add_arv_hints()

    try:
        if api_client is None:
            api_client = arvados.api('v1', model=OrderedJsonModel())
        if keep_client is None:
            keep_client = arvados.keep.KeepClient(api_client=api_client,
                                                  num_retries=4)
        runner = ArvCwlRunner(api_client,
                              work_api=arvargs.work_api,
                              keep_client=keep_client,
                              num_retries=4,
                              output_name=arvargs.output_name,
                              output_tags=arvargs.output_tags)
    except Exception as e:
        logger.error(e)
        return 1

    if arvargs.debug:
        logger.setLevel(logging.DEBUG)
        logging.getLogger('arvados').setLevel(logging.DEBUG)

    if arvargs.quiet:
        logger.setLevel(logging.WARN)
        logging.getLogger('arvados').setLevel(logging.WARN)
        logging.getLogger('arvados.arv-run').setLevel(logging.WARN)

    if arvargs.metrics:
        metrics.setLevel(logging.DEBUG)
        logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)

    if arvargs.log_timestamps:
        arvados.log_handler.setFormatter(
            logging.Formatter(
                '%(asctime)s %(name)s %(levelname)s: %(message)s',
                '%Y-%m-%d %H:%M:%S'))
    else:
        arvados.log_handler.setFormatter(
            logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    arvargs.conformance_test = None
    arvargs.use_container = True
    arvargs.relax_path_checks = True
    arvargs.validate = None

    return cwltool.main.main(args=arvargs,
                             stdout=stdout,
                             stderr=stderr,
                             executor=runner.arv_executor,
                             makeTool=runner.arv_make_tool,
                             versionfunc=versionstring,
                             job_order_object=job_order_object,
                             make_fs_access=partial(CollectionFsAccess,
                                                    api_client=api_client,
                                                    keep_client=keep_client),
                             fetcher_constructor=partial(
                                 CollectionFetcher,
                                 api_client=api_client,
                                 keep_client=keep_client,
                                 num_retries=runner.num_retries),
                             resolver=partial(collectionResolver,
                                              api_client,
                                              num_retries=runner.num_retries),
                             logger_handler=arvados.log_handler)
    if not args.destination:
        if len(args.uuids) < 2:
            parser.error("missing destination project UUID argument")
        args.destination = args.uuids.pop()
    return args


def setup_logging(args):
    log_handler = logging.StreamHandler()
    log_handler.setFormatter(
        logging.Formatter(
            '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
            '%Y-%m-%d %H:%M:%S'))
    logger.addHandler(log_handler)
    logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))


def main(stdin, stdout, stderr, arglist, arv):
    args = parse_arguments(arglist)
    setup_logging(args)
    uuid_mapper = UUIDMapper(arv)
    dependencies = DependencyTracker(uuid_mapper, logger)
    for uuid in args.uuids:
        dependencies.add_object(uuid, arv)
    dependencies.move_to(args.destination, arv, args.request_handler)


if __name__ == '__main__':
    main(sys.stdin, sys.stdout, sys.stderr, sys.argv[1:],
         arvados.api('v1', model=OrderedJsonModel()))
Ejemplo n.º 16
0
def main(args, stdout, stderr, api_client=None, keep_client=None,
         install_sig_handlers=True):
    parser = arg_parser()

    job_order_object = None
    arvargs = parser.parse_args(args)

    if len(arvargs.storage_classes.strip().split(',')) > 1:
        logger.error(str(u"Multiple storage classes are not supported currently."))
        return 1

    arvargs.use_container = True
    arvargs.relax_path_checks = True
    arvargs.print_supported_versions = False

    if install_sig_handlers:
        arv_cmd.install_signal_handlers()

    if arvargs.update_workflow:
        if arvargs.update_workflow.find('-7fd4e-') == 5:
            want_api = 'containers'
        else:
            want_api = None
        if want_api and arvargs.work_api and want_api != arvargs.work_api:
            logger.error(str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified').format(
                arvargs.update_workflow, want_api, arvargs.work_api))
            return 1
        arvargs.work_api = want_api

    if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
        job_order_object = ({}, "")

    add_arv_hints()

    for key, val in viewitems(cwltool.argparser.get_default_args()):
        if not hasattr(arvargs, key):
            setattr(arvargs, key, val)

    try:
        if api_client is None:
            api_client = arvados.safeapi.ThreadSafeApiCache(
                api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout},
                keep_params={"num_retries": 4})
            keep_client = api_client.keep
            # Make an API object now so errors are reported early.
            api_client.users().current().execute()
        if keep_client is None:
            keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
        executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
    except Exception:
        logger.exception("Error creating the Arvados CWL Executor")
        return 1

    # Note that unless in debug mode, some stack traces related to user
    # workflow errors may be suppressed.
    if arvargs.debug:
        logger.setLevel(logging.DEBUG)
        logging.getLogger('arvados').setLevel(logging.DEBUG)

    if arvargs.quiet:
        logger.setLevel(logging.WARN)
        logging.getLogger('arvados').setLevel(logging.WARN)
        logging.getLogger('arvados.arv-run').setLevel(logging.WARN)

    if arvargs.metrics:
        metrics.setLevel(logging.DEBUG)
        logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)

    if arvargs.log_timestamps:
        arvados.log_handler.setFormatter(logging.Formatter(
            '%(asctime)s %(name)s %(levelname)s: %(message)s',
            '%Y-%m-%d %H:%M:%S'))
    else:
        arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    if stdout is sys.stdout:
        # cwltool.main has code to work around encoding issues with
        # sys.stdout and unix pipes (they default to ASCII encoding,
        # we want utf-8), so when stdout is sys.stdout set it to None
        # to take advantage of that.  Don't override it for all cases
        # since we still want to be able to capture stdout for the
        # unit tests.
        stdout = None

    return cwltool.main.main(args=arvargs,
                             stdout=stdout,
                             stderr=stderr,
                             executor=executor.arv_executor,
                             versionfunc=versionstring,
                             job_order_object=job_order_object,
                             logger_handler=arvados.log_handler,
                             custom_schema_callback=add_arv_hints,
                             loadingContext=executor.loadingContext,
                             runtimeContext=executor.runtimeContext,
                             input_required=not (arvargs.create_workflow or arvargs.update_workflow))
Ejemplo n.º 17
0
def run():
    # Timestamps are added by crunch-job, so don't print redundant timestamps.
    arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    # Print package versions
    logger.info(arvados_cwl.versionstring())

    api = arvados.api("v1")

    arvados_cwl.add_arv_hints()

    runner = None
    try:
        job_order_object = arvados.current_job()['script_parameters']
        toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool"))

        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')

        def keeppath(v):
            if pdh_path.match(v):
                return "keep:%s" % v
            else:
                return v

        def keeppathObj(v):
            if "location" in v:
                v["location"] = keeppath(v["location"])

        for k,v in job_order_object.items():
            if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
                job_order_object[k] = {
                    "class": "File",
                    "location": "keep:%s" % v
                }

        adjustFileObjs(job_order_object, keeppathObj)
        adjustDirObjs(job_order_object, keeppathObj)
        normalizeFilesDirs(job_order_object)

        output_name = None
        output_tags = None
        enable_reuse = True
        on_error = "continue"
        debug = False

        if "arv:output_name" in job_order_object:
            output_name = job_order_object["arv:output_name"]
            del job_order_object["arv:output_name"]

        if "arv:output_tags" in job_order_object:
            output_tags = job_order_object["arv:output_tags"]
            del job_order_object["arv:output_tags"]

        if "arv:enable_reuse" in job_order_object:
            enable_reuse = job_order_object["arv:enable_reuse"]
            del job_order_object["arv:enable_reuse"]

        if "arv:on_error" in job_order_object:
            on_error = job_order_object["arv:on_error"]
            del job_order_object["arv:on_error"]

        if "arv:debug" in job_order_object:
            debug = job_order_object["arv:debug"]
            del job_order_object["arv:debug"]

        runner = arvados_cwl.ArvCwlRunner(api_client=arvados.safeapi.ThreadSafeApiCache(
            api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}),
                                          output_name=output_name, output_tags=output_tags)

        make_fs_access = functools.partial(CollectionFsAccess,
                                 collection_cache=runner.collection_cache)

        t = load_tool(toolpath, runner.arv_make_tool,
                      fetcher_constructor=functools.partial(CollectionFetcher,
                                                  api_client=runner.api,
                                                  fs_access=make_fs_access(""),
                                                  num_retries=runner.num_retries))

        if debug:
            logger.setLevel(logging.DEBUG)
            logging.getLogger('arvados').setLevel(logging.DEBUG)
            logging.getLogger("cwltool").setLevel(logging.DEBUG)

        args = argparse.Namespace()
        args.project_uuid = arvados.current_job()["owner_uuid"]
        args.enable_reuse = enable_reuse
        args.on_error = on_error
        args.submit = False
        args.debug = debug
        args.quiet = False
        args.ignore_docker_for_reuse = False
        args.basedir = os.getcwd()
        args.name = None
        args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
        args.make_fs_access = make_fs_access
        args.trash_intermediate = False
        args.intermediate_output_ttl = 0
        args.priority = arvados_cwl.DEFAULT_PRIORITY
        args.do_validate = True
        args.disable_js_validation = False

        runner.arv_executor(t, job_order_object, **vars(args))
    except Exception as e:
        if isinstance(e, WorkflowException):
            logging.info("Workflow error %s", e)
        else:
            logging.exception("Unhandled exception")
        if runner and runner.final_output_collection:
            outputCollection = runner.final_output_collection.portable_data_hash()
        else:
            outputCollection = None
        api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                                             body={
                                                 'output': outputCollection,
                                                 'success': False,
                                                 'progress':1.0
                                             }).execute()
Ejemplo n.º 18
0
def run():
    # Print package versions
    logger.info(arvados_cwl.versionstring())

    api = arvados.api("v1")

    arvados_cwl.add_arv_hints()

    runner = None
    try:
        job_order_object = arvados.current_job()['script_parameters']

        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')

        def keeppath(v):
            if pdh_path.match(v):
                return "keep:%s" % v
            else:
                return v

        def keeppathObj(v):
            v["location"] = keeppath(v["location"])

        job_order_object["cwl:tool"] = "file://%s/%s" % (
            os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"])

        for k, v in job_order_object.items():
            if isinstance(
                    v,
                    basestring) and arvados.util.keep_locator_pattern.match(v):
                job_order_object[k] = {
                    "class": "File",
                    "location": "keep:%s" % v
                }

        adjustFileObjs(job_order_object, keeppathObj)
        adjustDirObjs(job_order_object, keeppathObj)
        normalizeFilesDirs(job_order_object)
        adjustDirObjs(
            job_order_object,
            functools.partial(
                getListing,
                arvados_cwl.fsaccess.CollectionFsAccess("", api_client=api)))

        output_name = None
        output_tags = None
        enable_reuse = True
        if "arv:output_name" in job_order_object:
            output_name = job_order_object["arv:output_name"]
            del job_order_object["arv:output_name"]

        if "arv:output_tags" in job_order_object:
            output_tags = job_order_object["arv:output_tags"]
            del job_order_object["arv:output_tags"]

        if "arv:enable_reuse" in job_order_object:
            enable_reuse = job_order_object["arv:enable_reuse"]
            del job_order_object["arv:enable_reuse"]

        runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api(
            'v1', model=OrderedJsonModel()),
                                          output_name=output_name,
                                          output_tags=output_tags)

        t = load_tool(job_order_object, runner.arv_make_tool)

        args = argparse.Namespace()
        args.project_uuid = arvados.current_job()["owner_uuid"]
        args.enable_reuse = enable_reuse
        args.submit = False
        args.debug = True
        args.quiet = False
        args.ignore_docker_for_reuse = False
        args.basedir = os.getcwd()
        args.cwl_runner_job = {
            "uuid": arvados.current_job()["uuid"],
            "state": arvados.current_job()["state"]
        }
        outputObj = runner.arv_executor(t, job_order_object, **vars(args))
    except Exception as e:
        if isinstance(e, WorkflowException):
            logging.info("Workflow error %s", e)
        else:
            logging.exception("Unhandled exception")
        if runner and runner.final_output_collection:
            outputCollection = runner.final_output_collection.portable_data_hash(
            )
        else:
            outputCollection = None
        api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                               body={
                                   'output': outputCollection,
                                   'success': False,
                                   'progress': 1.0
                               }).execute()
Ejemplo n.º 19
0
def main(args,
         stdout,
         stderr,
         api_client=None,
         keep_client=None,
         install_sig_handlers=True):
    parser = arg_parser()

    job_order_object = None
    arvargs = parser.parse_args(args)

    if len(arvargs.storage_classes.strip().split(',')) > 1:
        logger.error(
            str(u"Multiple storage classes are not supported currently."))
        return 1

    arvargs.use_container = True
    arvargs.relax_path_checks = True
    arvargs.print_supported_versions = False

    if install_sig_handlers:
        arv_cmd.install_signal_handlers()

    if arvargs.update_workflow:
        if arvargs.update_workflow.find('-7fd4e-') == 5:
            want_api = 'containers'
        elif arvargs.update_workflow.find('-p5p6p-') == 5:
            want_api = 'jobs'
        else:
            want_api = None
        if want_api and arvargs.work_api and want_api != arvargs.work_api:
            logger.error(
                str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified'
                    ).format(arvargs.update_workflow, want_api,
                             arvargs.work_api))
            return 1
        arvargs.work_api = want_api

    if (arvargs.create_workflow
            or arvargs.update_workflow) and not arvargs.job_order:
        job_order_object = ({}, "")

    add_arv_hints()

    for key, val in viewitems(cwltool.argparser.get_default_args()):
        if not hasattr(arvargs, key):
            setattr(arvargs, key, val)

    try:
        if api_client is None:
            api_client = arvados.safeapi.ThreadSafeApiCache(
                api_params={
                    "model": OrderedJsonModel(),
                    "timeout": arvargs.http_timeout
                },
                keep_params={"num_retries": 4})
            keep_client = api_client.keep
            # Make an API object now so errors are reported early.
            api_client.users().current().execute()
        if keep_client is None:
            keep_client = arvados.keep.KeepClient(api_client=api_client,
                                                  num_retries=4)
        executor = ArvCwlExecutor(api_client,
                                  arvargs,
                                  keep_client=keep_client,
                                  num_retries=4)
    except Exception as e:
        logger.error(e)
        return 1

    if arvargs.debug:
        logger.setLevel(logging.DEBUG)
        logging.getLogger('arvados').setLevel(logging.DEBUG)

    if arvargs.quiet:
        logger.setLevel(logging.WARN)
        logging.getLogger('arvados').setLevel(logging.WARN)
        logging.getLogger('arvados.arv-run').setLevel(logging.WARN)

    if arvargs.metrics:
        metrics.setLevel(logging.DEBUG)
        logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)

    if arvargs.log_timestamps:
        arvados.log_handler.setFormatter(
            logging.Formatter(
                '%(asctime)s %(name)s %(levelname)s: %(message)s',
                '%Y-%m-%d %H:%M:%S'))
    else:
        arvados.log_handler.setFormatter(
            logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    return cwltool.main.main(args=arvargs,
                             stdout=stdout,
                             stderr=stderr,
                             executor=executor.arv_executor,
                             versionfunc=versionstring,
                             job_order_object=job_order_object,
                             logger_handler=arvados.log_handler,
                             custom_schema_callback=add_arv_hints,
                             loadingContext=executor.loadingContext,
                             runtimeContext=executor.runtimeContext)