Example #1
0
def main(args, stdout, stderr, api_client=None, keep_client=None,
         install_sig_handlers=True):
    parser = arg_parser()

    job_order_object = None
    arvargs = parser.parse_args(args)

    if len(arvargs.storage_classes.strip().split(',')) > 1:
        logger.error(str(u"Multiple storage classes are not supported currently."))
        return 1

    arvargs.use_container = True
    arvargs.relax_path_checks = True
    arvargs.print_supported_versions = False

    if install_sig_handlers:
        arv_cmd.install_signal_handlers()

    if arvargs.update_workflow:
        if arvargs.update_workflow.find('-7fd4e-') == 5:
            want_api = 'containers'
        elif arvargs.update_workflow.find('-p5p6p-') == 5:
            want_api = 'jobs'
        else:
            want_api = None
        if want_api and arvargs.work_api and want_api != arvargs.work_api:
            logger.error(str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified').format(
                arvargs.update_workflow, want_api, arvargs.work_api))
            return 1
        arvargs.work_api = want_api

    if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
        job_order_object = ({}, "")

    add_arv_hints()

    for key, val in viewitems(cwltool.argparser.get_default_args()):
        if not hasattr(arvargs, key):
            setattr(arvargs, key, val)

    try:
        if api_client is None:
            api_client = arvados.safeapi.ThreadSafeApiCache(
                api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout},
                keep_params={"num_retries": 4})
            keep_client = api_client.keep
            # Make an API object now so errors are reported early.
            api_client.users().current().execute()
        if keep_client is None:
            keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
        executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
    except Exception:
        logger.exception("Error creating the Arvados CWL Executor")
        return 1

    # Note that unless in debug mode, some stack traces related to user
    # workflow errors may be suppressed. See ArvadosJob.done().
    if arvargs.debug:
        logger.setLevel(logging.DEBUG)
        logging.getLogger('arvados').setLevel(logging.DEBUG)

    if arvargs.quiet:
        logger.setLevel(logging.WARN)
        logging.getLogger('arvados').setLevel(logging.WARN)
        logging.getLogger('arvados.arv-run').setLevel(logging.WARN)

    if arvargs.metrics:
        metrics.setLevel(logging.DEBUG)
        logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)

    if arvargs.log_timestamps:
        arvados.log_handler.setFormatter(logging.Formatter(
            '%(asctime)s %(name)s %(levelname)s: %(message)s',
            '%Y-%m-%d %H:%M:%S'))
    else:
        arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    return cwltool.main.main(args=arvargs,
                             stdout=stdout,
                             stderr=stderr,
                             executor=executor.arv_executor,
                             versionfunc=versionstring,
                             job_order_object=job_order_object,
                             logger_handler=arvados.log_handler,
                             custom_schema_callback=add_arv_hints,
                             loadingContext=executor.loadingContext,
                             runtimeContext=executor.runtimeContext)
Example #2
0
def main(args, stdout, stderr, api_client=None, keep_client=None,
         install_sig_handlers=True):
    parser = arg_parser()

    job_order_object = None
    arvargs = parser.parse_args(args)

    if len(arvargs.storage_classes.strip().split(',')) > 1:
        logger.error(str(u"Multiple storage classes are not supported currently."))
        return 1

    arvargs.use_container = True
    arvargs.relax_path_checks = True
    arvargs.print_supported_versions = False

    if install_sig_handlers:
        arv_cmd.install_signal_handlers()

    if arvargs.update_workflow:
        if arvargs.update_workflow.find('-7fd4e-') == 5:
            want_api = 'containers'
        else:
            want_api = None
        if want_api and arvargs.work_api and want_api != arvargs.work_api:
            logger.error(str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified').format(
                arvargs.update_workflow, want_api, arvargs.work_api))
            return 1
        arvargs.work_api = want_api

    if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
        job_order_object = ({}, "")

    add_arv_hints()

    for key, val in viewitems(cwltool.argparser.get_default_args()):
        if not hasattr(arvargs, key):
            setattr(arvargs, key, val)

    try:
        if api_client is None:
            api_client = arvados.safeapi.ThreadSafeApiCache(
                api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout},
                keep_params={"num_retries": 4})
            keep_client = api_client.keep
            # Make an API object now so errors are reported early.
            api_client.users().current().execute()
        if keep_client is None:
            keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
        executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
    except Exception:
        logger.exception("Error creating the Arvados CWL Executor")
        return 1

    # Note that unless in debug mode, some stack traces related to user
    # workflow errors may be suppressed.
    if arvargs.debug:
        logger.setLevel(logging.DEBUG)
        logging.getLogger('arvados').setLevel(logging.DEBUG)

    if arvargs.quiet:
        logger.setLevel(logging.WARN)
        logging.getLogger('arvados').setLevel(logging.WARN)
        logging.getLogger('arvados.arv-run').setLevel(logging.WARN)

    if arvargs.metrics:
        metrics.setLevel(logging.DEBUG)
        logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)

    if arvargs.log_timestamps:
        arvados.log_handler.setFormatter(logging.Formatter(
            '%(asctime)s %(name)s %(levelname)s: %(message)s',
            '%Y-%m-%d %H:%M:%S'))
    else:
        arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    if stdout is sys.stdout:
        # cwltool.main has code to work around encoding issues with
        # sys.stdout and unix pipes (they default to ASCII encoding,
        # we want utf-8), so when stdout is sys.stdout set it to None
        # to take advantage of that.  Don't override it for all cases
        # since we still want to be able to capture stdout for the
        # unit tests.
        stdout = None

    return cwltool.main.main(args=arvargs,
                             stdout=stdout,
                             stderr=stderr,
                             executor=executor.arv_executor,
                             versionfunc=versionstring,
                             job_order_object=job_order_object,
                             logger_handler=arvados.log_handler,
                             custom_schema_callback=add_arv_hints,
                             loadingContext=executor.loadingContext,
                             runtimeContext=executor.runtimeContext,
                             input_required=not (arvargs.create_workflow or arvargs.update_workflow))
Example #3
0
def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
         install_sig_handlers=True):
    global api_client

    args = parse_arguments(arguments)
    logger = logging.getLogger('arvados.arv_put')
    if args.silent:
        logger.setLevel(logging.WARNING)
    else:
        logger.setLevel(logging.INFO)
    status = 0

    request_id = arvados.util.new_request_id()

    formatter = ArvPutLogFormatter(request_id)
    logging.getLogger('arvados').handlers[0].setFormatter(formatter)

    if api_client is None:
        api_client = arvados.api('v1', request_id=request_id)

    if install_sig_handlers:
        arv_cmd.install_signal_handlers()

    # Determine the name to use
    if args.name:
        if args.stream or args.raw:
            logger.error("Cannot use --name with --stream or --raw")
            sys.exit(1)
        elif args.update_collection:
            logger.error("Cannot use --name with --update-collection")
            sys.exit(1)
        collection_name = args.name
    else:
        collection_name = "Saved at {} by {}@{}".format(
            datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
            pwd.getpwuid(os.getuid()).pw_name,
            socket.gethostname())

    if args.project_uuid and (args.stream or args.raw):
        logger.error("Cannot use --project-uuid with --stream or --raw")
        sys.exit(1)

    # Determine the parent project
    try:
        project_uuid = desired_project_uuid(api_client, args.project_uuid,
                                            args.retries)
    except (apiclient_errors.Error, ValueError) as error:
        logger.error(error)
        sys.exit(1)

    if args.progress:
        reporter = progress_writer(human_progress)
    elif args.batch_progress:
        reporter = progress_writer(machine_progress)
    else:
        reporter = None

    #  Split storage-classes argument
    storage_classes = None
    if args.storage_classes:
        storage_classes = args.storage_classes.strip().split(',')
        if len(storage_classes) > 1:
            logger.error("Multiple storage classes are not supported currently.")
            sys.exit(1)


    # Setup exclude regex from all the --exclude arguments provided
    name_patterns = []
    exclude_paths = []
    exclude_names = None
    if len(args.exclude) > 0:
        # We're supporting 2 kinds of exclusion patterns:
        # 1)   --exclude '*.jpg'    (file/dir name patterns, will only match
        #                            the name, wherever the file is on the tree)
        # 2.1) --exclude 'foo/bar'  (file/dir path patterns, will match the
        #                            entire path, and should be relative to
        #                            any input dir argument)
        # 2.2) --exclude './*.jpg'  (Special case for excluding files/dirs
        #                            placed directly underneath the input dir)
        for p in args.exclude:
            # Only relative paths patterns allowed
            if p.startswith(os.sep):
                logger.error("Cannot use absolute paths with --exclude")
                sys.exit(1)
            if os.path.dirname(p):
                # We don't support of path patterns with '..'
                p_parts = p.split(os.sep)
                if '..' in p_parts:
                    logger.error(
                        "Cannot use path patterns that include or '..'")
                    sys.exit(1)
                # Path search pattern
                exclude_paths.append(p)
            else:
                # Name-only search pattern
                name_patterns.append(p)
        # For name only matching, we can combine all patterns into a single
        # regexp, for better performance.
        exclude_names = re.compile('|'.join(
            [fnmatch.translate(p) for p in name_patterns]
        )) if len(name_patterns) > 0 else None
        # Show the user the patterns to be used, just in case they weren't
        # specified inside quotes and got changed by the shell expansion.
        logger.info("Exclude patterns: {}".format(args.exclude))

    # If this is used by a human, and there's at least one directory to be
    # uploaded, the expected bytes calculation can take a moment.
    if args.progress and any([os.path.isdir(f) for f in args.paths]):
        logger.info("Calculating upload size, this could take some time...")
    try:
        writer = ArvPutUploadJob(paths = args.paths,
                                 resume = args.resume,
                                 use_cache = args.use_cache,
                                 filename = args.filename,
                                 reporter = reporter,
                                 api_client = api_client,
                                 num_retries = args.retries,
                                 replication_desired = args.replication,
                                 put_threads = args.threads,
                                 name = collection_name,
                                 owner_uuid = project_uuid,
                                 ensure_unique_name = True,
                                 update_collection = args.update_collection,
                                 storage_classes=storage_classes,
                                 logger=logger,
                                 dry_run=args.dry_run,
                                 follow_links=args.follow_links,
                                 exclude_paths=exclude_paths,
                                 exclude_names=exclude_names)
    except ResumeCacheConflict:
        logger.error("\n".join([
            "arv-put: Another process is already uploading this data.",
            "         Use --no-cache if this is really what you want."]))
        sys.exit(1)
    except ResumeCacheInvalidError:
        logger.error("\n".join([
            "arv-put: Resume cache contains invalid signature: it may have expired",
            "         or been created with another Arvados user's credentials.",
            "         Switch user or use one of the following options to restart upload:",
            "         --no-resume to start a new resume cache.",
            "         --no-cache to disable resume cache."]))
        sys.exit(1)
    except CollectionUpdateError as error:
        logger.error("\n".join([
            "arv-put: %s" % str(error)]))
        sys.exit(1)
    except ArvPutUploadIsPending:
        # Dry run check successful, return proper exit code.
        sys.exit(2)
    except ArvPutUploadNotPending:
        # No files pending for upload
        sys.exit(0)
    except PathDoesNotExistError as error:
        logger.error("\n".join([
            "arv-put: %s" % str(error)]))
        sys.exit(1)

    if not args.dry_run and not args.update_collection and args.resume and writer.bytes_written > 0:
        logger.warning("\n".join([
            "arv-put: Resuming previous upload from last checkpoint.",
            "         Use the --no-resume option to start over."]))

    if not args.dry_run:
        writer.report_progress()
    output = None
    try:
        writer.start(save_collection=not(args.stream or args.raw))
    except arvados.errors.ApiError as error:
        logger.error("\n".join([
            "arv-put: %s" % str(error)]))
        sys.exit(1)

    if args.progress:  # Print newline to split stderr from stdout for humans.
        logger.info("\n")

    if args.stream:
        if args.normalize:
            output = writer.manifest_text(normalize=True)
        else:
            output = writer.manifest_text()
    elif args.raw:
        output = ','.join(writer.data_locators())
    else:
        try:
            if args.update_collection:
                logger.info(u"Collection updated: '{}'".format(writer.collection_name()))
            else:
                logger.info(u"Collection saved as '{}'".format(writer.collection_name()))
            if args.portable_data_hash:
                output = writer.portable_data_hash()
            else:
                output = writer.manifest_locator()
        except apiclient_errors.Error as error:
            logger.error(
                "arv-put: Error creating Collection on project: {}.".format(
                    error))
            status = 1

    # Print the locator (uuid) of the new collection.
    if output is None:
        status = status or 1
    elif not args.silent:
        stdout.write(output)
        if not output.endswith('\n'):
            stdout.write('\n')

    if install_sig_handlers:
        arv_cmd.restore_signal_handlers()

    if status != 0:
        sys.exit(status)

    # Success!
    return output
Example #4
0
def main(args,
         stdout,
         stderr,
         api_client=None,
         keep_client=None,
         install_sig_handlers=True):
    parser = arg_parser()

    job_order_object = None
    arvargs = parser.parse_args(args)

    if len(arvargs.storage_classes.strip().split(',')) > 1:
        logger.error(
            str(u"Multiple storage classes are not supported currently."))
        return 1

    arvargs.use_container = True
    arvargs.relax_path_checks = True
    arvargs.print_supported_versions = False

    if install_sig_handlers:
        arv_cmd.install_signal_handlers()

    if arvargs.update_workflow:
        if arvargs.update_workflow.find('-7fd4e-') == 5:
            want_api = 'containers'
        elif arvargs.update_workflow.find('-p5p6p-') == 5:
            want_api = 'jobs'
        else:
            want_api = None
        if want_api and arvargs.work_api and want_api != arvargs.work_api:
            logger.error(
                str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified'
                    ).format(arvargs.update_workflow, want_api,
                             arvargs.work_api))
            return 1
        arvargs.work_api = want_api

    if (arvargs.create_workflow
            or arvargs.update_workflow) and not arvargs.job_order:
        job_order_object = ({}, "")

    add_arv_hints()

    for key, val in viewitems(cwltool.argparser.get_default_args()):
        if not hasattr(arvargs, key):
            setattr(arvargs, key, val)

    try:
        if api_client is None:
            api_client = arvados.safeapi.ThreadSafeApiCache(
                api_params={
                    "model": OrderedJsonModel(),
                    "timeout": arvargs.http_timeout
                },
                keep_params={"num_retries": 4})
            keep_client = api_client.keep
            # Make an API object now so errors are reported early.
            api_client.users().current().execute()
        if keep_client is None:
            keep_client = arvados.keep.KeepClient(api_client=api_client,
                                                  num_retries=4)
        executor = ArvCwlExecutor(api_client,
                                  arvargs,
                                  keep_client=keep_client,
                                  num_retries=4)
    except Exception as e:
        logger.error(e)
        return 1

    if arvargs.debug:
        logger.setLevel(logging.DEBUG)
        logging.getLogger('arvados').setLevel(logging.DEBUG)

    if arvargs.quiet:
        logger.setLevel(logging.WARN)
        logging.getLogger('arvados').setLevel(logging.WARN)
        logging.getLogger('arvados.arv-run').setLevel(logging.WARN)

    if arvargs.metrics:
        metrics.setLevel(logging.DEBUG)
        logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)

    if arvargs.log_timestamps:
        arvados.log_handler.setFormatter(
            logging.Formatter(
                '%(asctime)s %(name)s %(levelname)s: %(message)s',
                '%Y-%m-%d %H:%M:%S'))
    else:
        arvados.log_handler.setFormatter(
            logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    return cwltool.main.main(args=arvargs,
                             stdout=stdout,
                             stderr=stderr,
                             executor=executor.arv_executor,
                             versionfunc=versionstring,
                             job_order_object=job_order_object,
                             logger_handler=arvados.log_handler,
                             custom_schema_callback=add_arv_hints,
                             loadingContext=executor.loadingContext,
                             runtimeContext=executor.runtimeContext)
Example #5
0
def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
         install_sig_handlers=True):
    global api_client

    args = parse_arguments(arguments)
    logger = logging.getLogger('arvados.arv_put')
    if args.silent:
        logger.setLevel(logging.WARNING)
    else:
        logger.setLevel(logging.INFO)
    status = 0

    request_id = arvados.util.new_request_id()

    formatter = ArvPutLogFormatter(request_id)
    logging.getLogger('arvados').handlers[0].setFormatter(formatter)

    if api_client is None:
        api_client = arvados.api('v1', request_id=request_id)

    if install_sig_handlers:
        arv_cmd.install_signal_handlers()

    # Determine the name to use
    if args.name:
        if args.stream or args.raw:
            logger.error("Cannot use --name with --stream or --raw")
            sys.exit(1)
        elif args.update_collection:
            logger.error("Cannot use --name with --update-collection")
            sys.exit(1)
        collection_name = args.name
    else:
        collection_name = "Saved at {} by {}@{}".format(
            datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
            pwd.getpwuid(os.getuid()).pw_name,
            socket.gethostname())

    if args.project_uuid and (args.stream or args.raw):
        logger.error("Cannot use --project-uuid with --stream or --raw")
        sys.exit(1)

    # Determine the parent project
    try:
        project_uuid = desired_project_uuid(api_client, args.project_uuid,
                                            args.retries)
    except (apiclient_errors.Error, ValueError) as error:
        logger.error(error)
        sys.exit(1)

    if args.progress:
        reporter = progress_writer(human_progress)
    elif args.batch_progress:
        reporter = progress_writer(machine_progress)
    else:
        reporter = None

    #  Split storage-classes argument
    storage_classes = None
    if args.storage_classes:
        storage_classes = args.storage_classes.strip().split(',')
        if len(storage_classes) > 1:
            logger.error("Multiple storage classes are not supported currently.")
            sys.exit(1)


    # Setup exclude regex from all the --exclude arguments provided
    name_patterns = []
    exclude_paths = []
    exclude_names = None
    if len(args.exclude) > 0:
        # We're supporting 2 kinds of exclusion patterns:
        # 1)   --exclude '*.jpg'    (file/dir name patterns, will only match
        #                            the name, wherever the file is on the tree)
        # 2.1) --exclude 'foo/bar'  (file/dir path patterns, will match the
        #                            entire path, and should be relative to
        #                            any input dir argument)
        # 2.2) --exclude './*.jpg'  (Special case for excluding files/dirs
        #                            placed directly underneath the input dir)
        for p in args.exclude:
            # Only relative paths patterns allowed
            if p.startswith(os.sep):
                logger.error("Cannot use absolute paths with --exclude")
                sys.exit(1)
            if os.path.dirname(p):
                # We don't support of path patterns with '..'
                p_parts = p.split(os.sep)
                if '..' in p_parts:
                    logger.error(
                        "Cannot use path patterns that include or '..'")
                    sys.exit(1)
                # Path search pattern
                exclude_paths.append(p)
            else:
                # Name-only search pattern
                name_patterns.append(p)
        # For name only matching, we can combine all patterns into a single
        # regexp, for better performance.
        exclude_names = re.compile('|'.join(
            [fnmatch.translate(p) for p in name_patterns]
        )) if len(name_patterns) > 0 else None
        # Show the user the patterns to be used, just in case they weren't
        # specified inside quotes and got changed by the shell expansion.
        logger.info("Exclude patterns: {}".format(args.exclude))

    # If this is used by a human, and there's at least one directory to be
    # uploaded, the expected bytes calculation can take a moment.
    if args.progress and any([os.path.isdir(f) for f in args.paths]):
        logger.info("Calculating upload size, this could take some time...")
    try:
        writer = ArvPutUploadJob(paths = args.paths,
                                 resume = args.resume,
                                 use_cache = args.use_cache,
                                 filename = args.filename,
                                 reporter = reporter,
                                 api_client = api_client,
                                 num_retries = args.retries,
                                 replication_desired = args.replication,
                                 put_threads = args.threads,
                                 name = collection_name,
                                 owner_uuid = project_uuid,
                                 ensure_unique_name = True,
                                 update_collection = args.update_collection,
                                 storage_classes=storage_classes,
                                 logger=logger,
                                 dry_run=args.dry_run,
                                 follow_links=args.follow_links,
                                 exclude_paths=exclude_paths,
                                 exclude_names=exclude_names)
    except ResumeCacheConflict:
        logger.error("\n".join([
            "arv-put: Another process is already uploading this data.",
            "         Use --no-cache if this is really what you want."]))
        sys.exit(1)
    except CollectionUpdateError as error:
        logger.error("\n".join([
            "arv-put: %s" % str(error)]))
        sys.exit(1)
    except ArvPutUploadIsPending:
        # Dry run check successful, return proper exit code.
        sys.exit(2)
    except ArvPutUploadNotPending:
        # No files pending for upload
        sys.exit(0)
    except PathDoesNotExistError as error:
        logger.error("\n".join([
            "arv-put: %s" % str(error)]))
        sys.exit(1)

    if not args.dry_run and not args.update_collection and args.resume and writer.bytes_written > 0:
        logger.warning("\n".join([
            "arv-put: Resuming previous upload from last checkpoint.",
            "         Use the --no-resume option to start over."]))

    if not args.dry_run:
        writer.report_progress()
    output = None
    try:
        writer.start(save_collection=not(args.stream or args.raw))
    except arvados.errors.ApiError as error:
        logger.error("\n".join([
            "arv-put: %s" % str(error)]))
        sys.exit(1)

    if args.progress:  # Print newline to split stderr from stdout for humans.
        logger.info("\n")

    if args.stream:
        if args.normalize:
            output = writer.manifest_text(normalize=True)
        else:
            output = writer.manifest_text()
    elif args.raw:
        output = ','.join(writer.data_locators())
    else:
        try:
            if args.update_collection:
                logger.info("Collection updated: '{}'".format(writer.collection_name()))
            else:
                logger.info("Collection saved as '{}'".format(writer.collection_name()))
            if args.portable_data_hash:
                output = writer.portable_data_hash()
            else:
                output = writer.manifest_locator()
        except apiclient_errors.Error as error:
            logger.error(
                "arv-put: Error creating Collection on project: {}.".format(
                    error))
            status = 1

    # Print the locator (uuid) of the new collection.
    if output is None:
        status = status or 1
    elif not args.silent:
        stdout.write(output)
        if not output.endswith('\n'):
            stdout.write('\n')

    if install_sig_handlers:
        arv_cmd.restore_signal_handlers()

    if status != 0:
        sys.exit(status)

    # Success!
    return output