예제 #1
0
def dispatch(args):
    """Dispatch other commands to the appropriate model provided script."""

    cmd = args.cmd
    model = args.model
    path = utils.get_package_dir(model)

    param = " ".join(args.param)

    # Get working dir if any.

    if args.workding_dir is not None:
        utils.update_working_dir(model, args.workding_dir)
        if args.workding_dir == '':
            args.workding_dir = None
    else:
        args.working_dir = utils.get_working_dir(model)

    # Get conda environment name if any.

    conda_env_name = utils.get_conda_env_name(model)

    # Check that the model is installed and has commands.

    utils.check_model_installed(model)

    entry = utils.load_description(model)

    if 'commands' not in entry or len(entry['commands']) == 0:
        raise utils.CommandNotFoundException(cmd, model)

    # Correct misspelled command if possible.

    matched_cmd = utils.get_misspelled_command(cmd, list(entry['commands']))
    if matched_cmd is not None:
        cmd = matched_cmd

    # Check if cmd needs to use graphic display indicated in DESCRIPTION.yaml.

    meta = entry['meta']
    if 'display' in meta and cmd in meta['display'] and os.environ.get(
            'DISPLAY', '') == '':
        msg = "Graphic display is required but not available for command '{}'. Continue"
        yes = utils.yes_or_no(msg, cmd, yes=False)
        if not yes:
            msg = """
To enable DISPLAY be sure to connect to the server using 'ssh -X'
or else connect to the server's desktop using a local X server like X2Go.

"""
            sys.stdout.write(msg)
            sys.exit(1)

    # Obtain the default/chosen language for the package.

    lang = meta["languages"]

    # Deal with malformed 'languages' field

    lang_opts = {"python": "py", "R": "R"}
    for k in list(lang_opts):
        if lang in k:
            lang = lang_opts[k]
            break

    # Obtain the specified script file.

    script = cmd + "." + lang

    logger = logging.getLogger(__name__)
    logger.debug("Execute the script: " + os.path.join(path, script))

    if cmd not in list(entry['commands']) or not os.path.exists(
            os.path.join(path, script)):
        raise utils.CommandNotFoundException(cmd, model)

    # Determine the interpreter to use
    #
    # .R => Rscript; .py => python, etc.

    interpreter = utils.interpreter(script)

    # Change working dir if needed

    if args.workding_dir is not None:
        script = os.path.join(path, script)
        path = args.workding_dir

    # _MLHUB_CMD_CWD: a environment variable indicates current working
    #                 directory where command `ml xxx` is invoked.
    # _MLHUB_MODEL_NAME: env variable indicates the name of the model.
    #
    # The above two env vars can be obtained by helper function, such
    # as utils.get_cmd_cwd().  And model package developer should be
    # use the helper function instead of the env vars directly.

    command = "export _MLHUB_CMD_CWD='{}'; export _MLHUB_MODEL_NAME='{}'; {} {} {}".format(
        os.getcwd(), model, interpreter, script, param)

    # Run script inside conda environment if specified

    if conda_env_name is not None:
        command = 'bash -c "source activate {}; {}"'.format(
            conda_env_name, command)

    logger.debug("(cd " + path + "; " + command + ")")

    proc = subprocess.Popen(command,
                            shell=True,
                            cwd=path,
                            stderr=subprocess.PIPE)
    output, errors = proc.communicate()
    missing_r_dep = False
    if proc.returncode != 0:
        errors = errors.decode("utf-8")

        # Check if it is Python dependency unsatisfied

        dep_required = re.compile(
            r"ModuleNotFoundError: No module named '(.*)'").search(errors)

        # Check if R dependency unsatisified

        if dep_required is None:
            dep_required = re.compile(
                r"there is no package called ‘(.*)’").search(errors)
            if dep_required is not None:
                missing_r_dep = True

        # Check if required data resource not found

        data_required = re.compile(
            r"mlhub.utils.DataResourceNotFoundException").search(errors)

        if dep_required is not None:  # Dependency unsatisfied
            dep_required = dep_required.group(1)
            logger.error("Dependency unsatisfied: {}\n{}".format(
                dep_required, errors))
            raise utils.LackDependencyException(dep_required, missing_r_dep,
                                                model)
        elif data_required is not None:  # Data not found
            raise utils.DataResourceNotFoundException()
        else:  # Other unknown errors
            print("An error was encountered:\n")
            print(errors)

    else:
        # Suggest next step

        if not args.quiet:
            utils.print_next_step(cmd, description=entry, model=model)
예제 #2
0
def configure_model(args):
    """Ensure the user's environment is configured."""

    # TODO: Add support for additional configuration if any except those
    #       specified in MLHUB.yaml.
    # TODO: When fail, print out the failed dep, as well as installed
    #       deps and non-installed deps.
    # TODO: Add support for specifying packages version.
    # TODO: Add more informative messages for different kinds of
    #       dependencies.

    # Other ideas for configuration
    #
    # 1 Construct mlhub container (from Ubuntu) with known starting point
    #
    # 2 Assume the user is on a DSVM with free Azure account to test out.
    #
    # 3 Read dependencies: and language: and then install as required:
    #
    # 4 Assume model packager provides a configure.R script. This is a
    #   override and no other configuration happens if this is
    #   supplied. Alternatively this is viewed as a cop-out prividing
    #   no support from mlhub for the model packager. The preference
    #   would be to use the dependencies: tag to list the required R
    #   or python packages.
    #
    # So the meta-code might be
    #
    #   if file.exists(configure.XX):
    #     XX configure.XX
    #   else if language: == "Rscript":
    #     packages <- dependencies:
    #     install  <- packages[!(packages %in% installed.packages()[,"Package"])]
    #     if(length(new.packages)) install.packages(install)
    #   else if language: == "python":
    #     packages = dependencies:
    #     cat pacakges > requirements.txt
    #     pip install -r requirements.txt
    #

    if not args.model:

        # Configure MLHUB per se.
        # Includes bash completion and system pre-requisites

        if distro.id() in ['debian', 'ubuntu']:
            path = os.path.dirname(__file__)
            command = '/bin/bash {}'.format(
                os.path.join('scripts', 'dep', 'mlhub.sh'))
            proc = subprocess.Popen(command,
                                    shell=True,
                                    cwd=path,
                                    stderr=subprocess.PIPE)
            output, errors = proc.communicate()
            if proc.returncode != 0:
                errors = errors.decode("utf-8")
                print("\nAn error was encountered:\n")
                print(errors)
                raise utils.ConfigureFailedException()

        return

    # Model package configuration.

    model = args.model

    # Correct model name if possible.

    matched_model = utils.get_misspelled_pkg(model)
    if matched_model is not None:
        model = matched_model

    # Setup.

    pkg_dir = utils.get_package_dir(model)

    # Check if the model package is installed.

    utils.check_model_installed(model)

    # Install dependencies specified in MLHUB.yaml

    entry = utils.load_description(model)
    depspec = None
    if 'dependencies' in entry:
        depspec = entry['dependencies']
    elif 'dependencies' in entry['meta']:
        depspec = entry['meta']['dependencies']

    if depspec is not None:
        for spec in utils.flatten_mlhubyaml_deps(depspec):
            category = spec[0][-1]
            deplist = spec[1]

            # Category include:
            #   ------------------------------------------------------------------------------
            #           category | action
            #   -----------------|------------------------------------------------------------
            #              None  |  install package according to entry['meta']['languages']
            #                    |  if R,      install.packages(xxx) from cran;
            #                    |  if Python, pip install xxx
            #   -----------------|------------------------------------------------------------
            #            system  |  apt-get install
            #                sh  |  apt-get install
            #   -----------------|------------------------------------------------------------
            #                 r  |  install.packages(xxx) from cran, version can be specified
            #              cran  |  install.packages(xxx) from cran, version can be specified
            #   cran-2018-12-01  |  install cran snapshot on 2018-12-01
            #            github  |  devtools::install_github from github
            #   -----------------|------------------------------------------------------------
            #            python  |  apt-get install python-xxx
            #           python3  |  apt-get install python3-xxx
            #               pip  |  pip install
            #              pip3  |  pip3 install
            #             conda  |  conda install
            #   -----------------|------------------------------------------------------------
            #             files  |  download files
            #   -----------------|------------------------------------------------------------

            # ----- Determine deps by language -----

            if category is None:

                lang = entry['meta']['languages'].lower()
                if lang == 'r':
                    utils.install_r_deps(deplist, model, source='cran')
                elif 'python'.startswith(lang):
                    utils.install_python_deps(deplist, model, source='pip')

            # ----- System deps -----

            elif category == 'system' or 'shell'.startswith(category):
                utils.install_system_deps(deplist)

            # ----- R deps -----

            elif category == 'r':
                utils.install_r_deps(deplist, model, source='cran')

            elif category == 'cran' or category == 'github' or category.startswith(
                    'cran-'):
                utils.install_r_deps(deplist, model, source=category)

            # ----- Python deps -----

            elif category.startswith('python') or category.startswith(
                    'pip') or category == 'conda':
                utils.install_python_deps(deplist, model, source=category)

            # ----- Files -----

            elif 'files'.startswith(category):
                utils.install_file_deps(deplist, model)

    # Run additional configure script if any.

    conf = utils.configure(pkg_dir, "configure.sh", args.quiet) or True
    conf = utils.configure(pkg_dir, "configure.R", args.quiet) or conf
    conf = utils.configure(pkg_dir, "configure.py", args.quiet) or conf

    if not conf:
        if depspec is not None:
            msg = ("No configuration script provided for this model. "
                   "The following dependencies are required:\n")
            print(msg)
            print(yaml.dump(depspec))
        else:
            print("No configuration provided (maybe none is required).")

    # Update working dir if any.

    if args.workding_dir is not None:
        utils.update_working_dir(model, args.workding_dir)

    # Suggest next step.

    if not args.quiet:
        utils.print_next_step('configure', model=model)
예제 #3
0
def install_model(args):
    """Install a model.

    Args:
        args: Command line args parsed by argparse.
        args.model (str): mlm/zip path, mlm/zip url, model name, GitHub repo,
                          like mlhubber/mlhub, or MLHUB.yaml on github repo,
                          like mlhubber/audit:doc/MLHUB.yaml.
    """

    logger = logging.getLogger(__name__)
    logger.info('Install a model.')
    logger.debug('args: {}'.format(args))

    model = args.model  # model pkg name
    location = args.model  # pkg file path or URL
    version = None  # model pkg version
    mlhubyaml = None  # MLHUB.yaml path or URL

    # Obtain the model URL if not a local file.

    if not utils.is_archive(model) and not utils.is_url(
            model) and '/' not in model:

        # Model package name, which can be found in mlhub repo.
        # Like:
        #     $ ml install audit
        #
        # We assume the URL got from mlhub repo is a link to a mlm/zip/tar file
        # or a GitHub repo reference or MLHUB.yaml.

        # Correct model name if possible.

        matched_model = utils.get_misspelled_pkg(model)
        if matched_model is not None:
            model = matched_model

        # Get model pkg meta data from mlhub repo.

        location, version, meta_list = utils.get_model_info_from_repo(
            model, args.mlhub)

        # Update bash completion list.

        utils.update_model_completion({e['meta']['name'] for e in meta_list})

    if not utils.is_archive(location):

        # Model from GitHub.
        # Like:
        #     $ ml install mlhubber/audit
        #     $ ml install mlhubber/audit:doc/MLHUB.yaml
        #     $ ml install https://github.com/mlhubber/audit/...

        mlhubyaml = utils.get_pkgyaml_github_url(location)  # URL to MLHUB.yaml
        location = utils.get_githubrepo_zip_url(location)
        logger.debug("location: {}".format(location))
        logger.debug("mlhubyaml: {}".format(mlhubyaml))

    # Determine the path of downloaded/existing model package file

    pkgfile = None
    if utils.is_archive(location):
        pkgfile = os.path.basename(location)  # pkg file name
    elif utils.is_url(location):
        pkgfile = utils.get_url_filename(location)

    # Query archive type if not available from file name per se.

    while pkgfile is None or not utils.is_archive(pkgfile):
        print(
            "The file type cannot be determined.\n"
            "Please give it a file name with explicit valid archive extension: ",
            end='')
        pkgfile = input()

    uncompressdir = pkgfile[:pkgfile.rfind(
        '.')]  # Dir Where pkg file is extracted

    # Installation.

    entry = None  # Meta info read from MLHUB.yaml
    with tempfile.TemporaryDirectory() as mlhubtmpdir:

        # Determine the local path of the model package

        if utils.is_url(location):
            local = os.path.join(mlhubtmpdir, pkgfile)  # downloaded
        else:
            local = location  # local file path

        uncompressdir = os.path.join(mlhubtmpdir, uncompressdir)

        # Obtain model version.

        if version is None:
            if utils.ends_with_mlm(
                    pkgfile):  # Get version number from MLM file name.

                model, version = utils.interpret_mlm_name(pkgfile)

            elif not utils.is_github_url(
                    location):  # Get MLHUB.yaml inside the archive file.

                if utils.is_url(
                        location
                ):  # Download the package file because it is not from GitHub.
                    utils.download_model_pkg(location, local, pkgfile,
                                             args.quiet)

                if not args.quiet:
                    print("Extracting '{}' ...\n".format(pkgfile))

                utils.unpack_with_promote(local,
                                          uncompressdir,
                                          valid_name=pkgfile)
                mlhubyaml = utils.get_available_pkgyaml(
                    uncompressdir)  # Path to MLHUB.yaml

            if mlhubyaml is not None:  # Get version number from MLHUB.yaml
                entry = utils.read_mlhubyaml(mlhubyaml)
                meta = entry["meta"]
                model = meta["name"]
                version = meta["version"]

            utils.update_model_completion({model
                                           })  # Update bash completion list.

        # Check if model is already installed.

        install_path = utils.get_package_dir(model)  # Installation path
        if os.path.exists(install_path):
            installed_version = utils.load_description(
                model)['meta']['version']

            # Ensure version number is string.

            installed_version = str(installed_version)
            version = str(version)

            if StrictVersion(installed_version) > StrictVersion(version):
                yes = utils.yes_or_no(
                    "Downgrade '{}' from version '{}' to version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True)
            elif StrictVersion(installed_version) == StrictVersion(version):
                yes = utils.yes_or_no(
                    "Replace '{}' version '{}' with version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True)
            else:
                yes = utils.yes_or_no(
                    "Upgrade '{}' from version '{}' to version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True)

            if not yes:
                sys.exit(0)
            else:
                print()

            shutil.rmtree(install_path)

        # Uncompress package file.

        if not os.path.exists(
                uncompressdir
        ):  # Model pkg mlm or GitHub pkg has not unzipped yet.
            if utils.is_url(location):  # Download the package file if needed.
                utils.download_model_pkg(location, local, pkgfile, args.quiet)

            if not args.quiet:
                print("Extracting '{}' ...\n".format(pkgfile))

            utils.unpack_with_promote(local, uncompressdir, valid_name=pkgfile)

        # Install package files.
        #
        # Because it is time-consuming to download all package files one-by-one , we
        # download the whole zipball from the repo first, then re-arrange the files
        # according to `dependencies` -> `files` in MLHUB.yaml if any.

        # Find if any files specified in MLHUB.yaml

        if mlhubyaml is None:  # MLM file which can obtain version number from it name.
            mlhubyaml = utils.get_available_pkgyaml(uncompressdir)
            entry = utils.read_mlhubyaml(mlhubyaml)

        depspec = None
        if 'dependencies' in entry:
            depspec = entry['dependencies']
        elif 'dependencies' in entry['meta']:
            depspec = entry['meta']['dependencies']

        file_spec = None
        if depspec is not None and 'files' in depspec:
            file_spec = {'files': depspec['files']}
        elif 'files' in entry:
            file_spec = {'files': entry['files']}

        if file_spec is not None:  # install package files if they are specified in MLHUB.yaml

            # MLHUB.yaml should always be at the package root.

            os.mkdir(install_path)
            if utils.is_url(
                    mlhubyaml
            ):  # We currently only support MLHUB.yaml specified on GitHub.
                if mlhubyaml.startswith("https://api"):
                    urllib.request.urlretrieve(
                        json.loads(urllib.request.urlopen(
                            mlhubyaml).read())['download_url'],
                        os.path.join(install_path, MLHUB_YAML))
                else:
                    urllib.request.urlretrieve(
                        mlhubyaml, os.path.join(install_path, MLHUB_YAML))
            else:
                shutil.move(mlhubyaml, install_path)

            # All package files except MLHUB.yaml should be specified in 'files' of MLHUB.yaml

            try:
                utils.install_file_deps(
                    utils.flatten_mlhubyaml_deps(file_spec)[0][1],
                    model,
                    downloadir=uncompressdir)
            except utils.ModePkgInstallationFileNotFoundException as e:
                if os.path.exists(install_path):
                    shutil.rmtree(install_path)

                raise

        else:
            # Otherwise, put all files under package dir.
            # **Note** Here we must make sure <instal_path> does not exist.
            # Otherwise, <unzipdir> will be inside <install_path>
            shutil.move(uncompressdir, install_path)

        # Update bash completion list.

        utils.update_command_completion(
            set(utils.load_description(model)['commands']))

        # Update working dir if any.

        if args.workding_dir is not None:
            utils.update_working_dir(model, args.workding_dir)

        if not args.quiet:

            # Informative message about the size of the installed model.

            print("Installed '{}' into '{}' ({:,} bytes).".format(
                model, install_path, utils.dir_size(install_path)))

            # Suggest next step. README or DOWNLOAD

            utils.print_next_step('install', model=model)
예제 #4
0
def configure_model(args):
    """Ensure the user's environment is configured."""

    # TODO: Add support for additional configuration if any except those
    #       specified in MLHUB.yaml.
    # TODO: When fail, print out the failed dep, as well as installed
    #       deps and non-installed deps.
    # TODO: Add support for specifying packages version.
    # TODO: Add more informative messages for different kinds of
    #       dependencies.

    # Other ideas for configuration
    #
    # 1 Construct mlhub container (from Ubuntu) with known starting point
    #
    # 2 Assume the user is on a DSVM with free Azure account to test out.
    #
    # 3 Read dependencies: and language: and then install as required:
    #
    # 4 Assume model packager provides a configure.R script. This is a
    #   override and no other configuration happens if this is
    #   supplied. Alternatively this is viewed as a cop-out providing
    #   no support from mlhub for the model packager. The preference
    #   would be to use the dependencies: tag to list the required R
    #   or python packages.
    #
    # So the meta-code might be
    #
    #   if file.exists(configure.XX):
    #     XX configure.XX
    #   else if language: == "Rscript":
    #     packages <- dependencies:
    #     install  <- packages[!(packages %in% installed.packages()[,"Package"])]
    #     if(length(new.packages)) install.packages(install)
    #   else if language: == "python":
    #     packages = dependencies:
    #     cat packages > requirements.txt
    #     pip install -r requirements.txt
    #

    YES = args.y | args.yes

    # Avoid 403 errors which result when the header identifies itself
    # as python urllib or is empty and thus the web site assumes it is
    # a robot. We are not a robot but a user downloading a file. This
    # will ensure gitlab is okay with retrieving from a URL by adding
    # a header rather than no header. TODO move to using Requests.
    
    opener = urllib.request.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    urllib.request.install_opener(opener)

    if not args.model:

        # Configure MLHUB per se.
        # Includes bash completion and system pre-requisites

        if distro.id() in ["debian", "ubuntu"]:
            path = os.path.dirname(__file__)
            env_var = "export _MLHUB_OPTION_YES='y'; " if YES else ""
            env_var += 'export _MLHUB_PYTHON_EXE="{}"; '.format(sys.executable)
            script = os.path.join("scripts", "dep", "mlhub.sh")
            command = "{}{} {}".format(env_var, BASH_CMD, script)
            proc = subprocess.Popen(
                command, shell=True, cwd=path, stderr=subprocess.PIPE
            )
            output, errors = proc.communicate()
            if proc.returncode != 0:
                raise utils.ConfigureFailedException(errors.decode("utf-8"))

        return

    # Model package configuration.

    model = args.model

    # Correct model name if possible.

    matched_model = utils.get_misspelled_pkg(model)
    if matched_model is not None:
        model = matched_model

    # Setup.

    pkg_dir = utils.get_package_dir(model)

    # Check if the model package is installed.

    utils.check_model_installed(model)

    # Install dependencies specified in MLHUB.yaml

    entry = utils.load_description(model)
    depspec = None
    if "dependencies" in entry:
        depspec = entry["dependencies"]
    elif "dependencies" in entry["meta"]:
        depspec = entry["meta"]["dependencies"]

    if depspec is not None:
        for spec in utils.flatten_mlhubyaml_deps(depspec):
            category = spec[0][-1]
            deplist = spec[1]

            # Category include:
            #   ------------------------------------------------------------------------------
            #           category | action
            #   -----------------|------------------------------------------------------------
            #              None  |  install package according to entry['meta']['languages']
            #                    |  if R,      install.packages(xxx) from cran;
            #                    |  if Python, pip install xxx
            #   -----------------|------------------------------------------------------------
            #            system  |  apt-get install
            #                sh  |  apt-get install
            #   -----------------|------------------------------------------------------------
            #                 r  |  install.packages(xxx) from cran, version can be specified
            #              cran  |  install.packages(xxx) from cran, version can be specified
            #   cran-2018-12-01  |  install cran snapshot on 2018-12-01
            #            github  |  devtools::install_github from github
            #   -----------------|------------------------------------------------------------
            #            python  |  apt-get install python-xxx
            #           python3  |  apt-get install python3-xxx
            #               pip  |  pip install
            #              pip3  |  pip3 install
            #             conda  |  conda install
            #   -----------------|------------------------------------------------------------
            #             files  |  download files
            #   -----------------|------------------------------------------------------------

            # ----- Determine deps by language -----

            if category is None:

                lang = entry["meta"]["languages"].lower()
                if lang == "r":
                    utils.install_r_deps(
                        deplist, model, source="cran", yes=YES
                    )
                elif "python".startswith(lang):
                    utils.install_python_deps(
                        deplist, model, source="pip", yes=YES
                    )

            # ----- System deps -----

            elif category == "system" or "shell".startswith(category):
                utils.install_system_deps(deplist, yes=YES)

            # ----- R deps -----

            elif category == "r":
                utils.install_r_deps(deplist, model, source="cran", yes=YES)

            elif (
                category == "cran"
                or category == "github"
                or category.startswith("cran-")
            ):
                utils.install_r_deps(deplist, model, source=category, yes=YES)

            # ----- Python deps -----

            elif (
                category.startswith("python")
                or category.startswith("pip")
                or category == "conda"
            ):
                utils.install_python_deps(
                    deplist, model, source=category, yes=YES
                )

            # ----- Files -----

            elif "files".startswith(category):
                utils.install_file_deps(deplist, model, key=args.i, yes=YES)

    # Run additional configure script if any.

    conf = utils.configure(pkg_dir, "configure.sh", args.quiet) or True
    conf = utils.configure(pkg_dir, "configure.R", args.quiet) or conf
    conf = utils.configure(pkg_dir, "configure.py", args.quiet) or conf

    if not conf:
        if depspec is not None:
            msg = (
                "No configuration script provided for this model. "
                "The following dependencies are required:\n"
            )
            print(msg)
            print(yaml.dump(depspec))
        else:
            print("No configuration provided (maybe none is required).")

    # Update working dir if any.

    if args.working_dir is not None:
        utils.update_working_dir(model, args.working_dir)

    # Suggest next step.

    if not args.quiet:
        utils.print_next_step("configure", model=model)
예제 #5
0
def install_model(args):
    """Install a model.

    Args:
        args: Command line args parsed by argparse.
        args.model (str): mlm/zip path, mlm/zip url, model name, GitHub repo,
                          like mlhubber/mlhub, or MLHUB.yaml on github repo,
                          like mlhubber/audit:doc/MLHUB.yaml.
    """

    logger = logging.getLogger(__name__)
    logger.info("Install a model.")
    logger.debug(f"args: {args}")

    # Avoid 403 errors which result when the header identifies itself
    # as python urllib or is empty and thus the web site assumes it is
    # a robot. We are not a robot but a user downloading a file. This
    # will ensure gitlab is okay with retrieving from a URL by adding
    # a header rather than no header. TODO move to using Requests.
    
    opener = urllib.request.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    urllib.request.install_opener(opener)

    model = args.model  # model pkg name
    location = args.model  # pkg file path or URL
    key = args.i  # SSH key
    version = None  # model pkg version
    mlhubyaml = None  # MLHUB.yaml path or URL
    repo_obj = None  # RepoTypeURL object for related URL interpretation
    maybe_private = False  # Maybe private repo

    # Obtain the model URL if not a local file.

    if (
        not utils.is_archive_file(model)
        and not utils.is_url(model)
        and "/" not in model
    ):

        # Model package name, which can be found in mlhub repo.
        # Like:
        #     $ ml install audit
        #
        # We assume the URL got from mlhub repo is a link to a mlm/zip/tar file
        # or a GitHub repo reference or MLHUB.yaml.

        # Correct model name if possible.

        matched_model = utils.get_misspelled_pkg(model)
        if matched_model is not None:
            model = matched_model

        # Get model pkg meta data from mlhub repo.

        location, version, meta_list = utils.get_model_info_from_repo(
            model, args.mlhub
        )

        # Update bash completion list.

        utils.update_model_completion({e["meta"]["name"] for e in meta_list})

    if not utils.is_archive_file(location):

        # Model from a repo such as GitHub, GitLab, Bitbucket etc.
        #
        # Possible options are:
        #   $ ml install mlhubber/audit            # latest commit on the master branch of GitHub repo mlhubber/audit
        #   $ ml install mlhubber/audit@dev        # latest commit on the dev branch of GitHub repo mlhubber/audit
        #   $ ml install mlhubber/audit@0001ea4    # commit 0001ea4 of mlhubber/audit
        #   $ ml install mlhubber/audit:doc/MLHUB.yaml            # latest commit on master, but a specified YAML file
        #   $ ml install https://github.com/mlhubber/audit/...    # Arbitrary GitHub link address
        #
        #   $ ml install github:mlhubber/audit    # GitHub repo, the same as ml install mlhubber/audit
        #
        #   $ ml install gitlab:mlhubber/audit@2fe89kh:doc/MLHUB.yaml    # GitLab repo
        #   $ ml install https://https://gitlab.com/mlhubber/audit/...   # GitLab repo
        #
        #   $ ml install bitbucket:mlhubber/audit                        # BitBucket repo
        #   $ ml install https://bitbucket.org/mlhubber/audit/...        # BitBucket repo

        repo_obj = utils.RepoTypeURL.get_repo_obj(location)
        try:
            mlhubyaml = repo_obj.get_pkg_yaml_url()
            location = repo_obj.compose_repo_zip_url()
            logger.debug(f"location: {location}")
            logger.debug(f"mlhubyaml: {mlhubyaml}")
        except utils.DescriptionYAMLNotFoundException:  # Maybe private repo
            maybe_private = True
            pass

    # Determine the path of downloaded/existing model package file

    pkgfile = None
    if maybe_private:  # Maybe private repo
        pkgfile = repo_obj.repo
    elif utils.is_archive_file(location):
        pkgfile = os.path.basename(location)  # pkg file name
    elif utils.is_url(location):
        pkgfile = utils.get_url_filename(location)

    # Query archive type if not available from file name per se.

    if not maybe_private:
        while pkgfile is None or not utils.is_archive_file(pkgfile):
            print(
                "The file type cannot be determined.\n"
                "Please give it a file name with explicit valid archive extension: ",
                end="",
            )
            pkgfile = input()

    if maybe_private:
        uncompressdir = pkgfile
    else:
        uncompressdir = pkgfile[
            : pkgfile.rfind(".")
        ]  # Dir Where pkg file is extracted

    # Installation.

    entry = None  # Meta info read from MLHUB.yaml
    with tempfile.TemporaryDirectory() as mlhubtmpdir:

        # Determine the local path of the model package

        if maybe_private:
            local = None
        elif utils.is_url(location):
            local = os.path.join(mlhubtmpdir, pkgfile)  # downloaded
        else:
            local = location  # local file path

        uncompressdir = os.path.join(mlhubtmpdir, uncompressdir)

        # Obtain model version.

        if version is None:
            if utils.ends_with_mlm(
                pkgfile
            ):  # Get version number from MLM file name.

                model, version = utils.interpret_mlm_name(pkgfile)

            elif not repo_obj:

                # Get MLHUB.yaml inside the archive file.

                if utils.is_url(
                    location
                ):  # Download the package file because it is not from GitHub.
                    utils.download_model_pkg(
                        location, local, pkgfile, args.quiet
                    )

                if not args.quiet:
                    print("Extracting '{}' ...\n".format(pkgfile))

                utils.unpack_with_promote(
                    local, uncompressdir, valid_name=pkgfile
                )
                mlhubyaml = utils.get_available_pkgyaml(
                    uncompressdir
                )  # Path to MLHUB.yaml

            elif maybe_private:

                identity_env = (
                    "GIT_SSH_COMMAND='ssh -i {}' ".format(key) if key else ""
                )
                command = "cd {}; {}git clone {}; cd {}; git checkout {}".format(
                    mlhubtmpdir,
                    identity_env,
                    repo_obj.get_ssh_clone_url(),
                    repo_obj.repo,
                    repo_obj.ref,
                )
                proc = subprocess.Popen(
                    command, shell=True, stderr=subprocess.PIPE
                )
                output, errors = proc.communicate()
                if proc.returncode != 0:
                    raise utils.InstallFailedException(errors.decode("utf-8"))

                if repo_obj.path:
                    mlhubyaml = os.path.join(uncompressdir, repo_obj.path)
                else:
                    mlhubyaml = utils.get_available_pkgyaml(
                        uncompressdir
                    )  # Path to MLHUB.yaml

            if mlhubyaml is not None:  # Get version number from MLHUB.yaml
                entry = utils.read_mlhubyaml(mlhubyaml)
                meta = entry["meta"]
                model = meta["name"]
                version = meta["version"]

            utils.update_model_completion(
                {model}
            )  # Update bash completion list.

        # Check if model is already installed.

        install_path = utils.get_package_dir(model)  # Installation path
        if os.path.exists(install_path):
            installed_version = utils.load_description(model)["meta"][
                "version"
            ]

            # Ensure version number is string.

            installed_version = str(installed_version)
            version = str(version)

            if StrictVersion(installed_version) > StrictVersion(version):
                yes = utils.yes_or_no(
                    "Downgrade '{}' from version '{}' to version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True,
                )
            elif StrictVersion(installed_version) == StrictVersion(version):
                yes = utils.yes_or_no(
                    "Replace '{}' version '{}' with version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True,
                )
            else:
                yes = utils.yes_or_no(
                    "Upgrade '{}' from version '{}' to version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True,
                )

            if not yes:
                # Suggest next step before exiting, as if an install has just happened.
                utils.print_next_step("install", model=model)
                sys.exit(0)
            else:
                print()

            shutil.rmtree(install_path)

        # Uncompress package file.

        if not os.path.exists(
            uncompressdir
        ):  # Model pkg mlm or GitHub pkg has not unzipped yet.
            if utils.is_url(location):  # Download the package file if needed.
                utils.download_model_pkg(location, local, pkgfile, args.quiet)

            if not args.quiet:
                print("Extracting '{}' ...\n".format(pkgfile))

            utils.unpack_with_promote(local, uncompressdir, valid_name=pkgfile)

        # Install package files.
        #
        # Because it is time-consuming to download all package files one-by-one , we
        # download the whole zipball from the repo first, then re-arrange the files
        # according to `dependencies` -> `files` in MLHUB.yaml if any.

        # Find if any files specified in MLHUB.yaml

        if (
            mlhubyaml is None
        ):  # MLM file which can obtain version number from it name.
            mlhubyaml = utils.get_available_pkgyaml(uncompressdir)
            entry = utils.read_mlhubyaml(mlhubyaml)

        depspec = None
        if "dependencies" in entry:
            depspec = entry["dependencies"]
        elif "dependencies" in entry["meta"]:
            depspec = entry["meta"]["dependencies"]

        file_spec = None
        if depspec is not None and "files" in depspec:
            file_spec = {"files": depspec["files"]}
        elif "files" in entry:
            file_spec = {"files": entry["files"]}

        if (
            file_spec is not None
        ):  # install package files if they are specified in MLHUB.yaml

            # MLHUB.yaml should always be at the package root.

            os.mkdir(install_path)
            if utils.is_url(
                mlhubyaml
            ):  # We currently only support MLHUB.yaml specified on GitHub.
                if mlhubyaml.startswith("https://api"):
                    urllib.request.urlretrieve(
                        json.loads(urllib.request.urlopen(mlhubyaml).read())[
                            "download_url"
                        ],
                        os.path.join(install_path, MLHUB_YAML),
                    )
                else:
                    urllib.request.urlretrieve(
                        mlhubyaml, os.path.join(install_path, MLHUB_YAML)
                    )
            else:
                shutil.move(mlhubyaml, install_path)

            # All package files except MLHUB.yaml should be specified in 'files' of MLHUB.yaml

            try:
                utils.install_file_deps(
                    utils.flatten_mlhubyaml_deps(file_spec)[0][1],
                    model,
                    downloadir=uncompressdir,
                    yes=True,
                )
            except utils.ModelPkgInstallationFileNotFoundException:
                if os.path.exists(install_path):
                    shutil.rmtree(install_path)

                raise

        else:
            # Otherwise, put all files under package dir.
            # **Note** Here we must make sure <install_path> does not exist.
            # Otherwise, <unzipdir> will be inside <install_path>
            shutil.move(uncompressdir, install_path)

        # Update bash completion list.

        utils.update_command_completion(
            set(utils.load_description(model)["commands"])
        )

        # Update working dir if any.

        if args.working_dir is not None:
            utils.update_working_dir(model, args.working_dir)

        if not args.quiet:

            # Informative message about the size of the installed model.

            msg  = f"Found '{model}' version {version}.\n\nInstalled '{model}' "
            msg += f"into '{install_path}/' ({utils.dir_size(install_path):,} bytes)."
            print(msg)

            # Suggest next step. README or DOWNLOAD

            utils.print_next_step("install", model=model)
예제 #6
0
def dispatch(args):
    """Dispatch other commands to the appropriate model provided script."""

    cmd = args.cmd
    model = args.model
    path = utils.get_package_dir(model)

    param = " ".join(args.param)

    # Get working dir if any.

    if args.workding_dir is not None:
        utils.update_working_dir(model, args.workding_dir)
        if args.workding_dir == '':
            args.workding_dir = None
    else:
        args.working_dir = utils.get_working_dir(model)

    # Get conda environment name if any.

    conda_env_name = utils.get_conda_env_name(model)

    # Check that the model is installed and has commands.

    utils.check_model_installed(model)

    entry = utils.load_description(model)

    if 'commands' not in entry or len(entry['commands']) == 0:
        raise utils.CommandNotFoundException(cmd, model)

    # Correct misspelled command if possible.

    matched_cmd = utils.get_misspelled_command(cmd, list(entry['commands']))
    if matched_cmd is not None:
        cmd = matched_cmd

    # Check if cmd needs to use graphic display indicated in DESCRIPTION.yaml.

    meta = entry['meta']
    if 'display' in meta and cmd in meta['display'] and os.environ.get(
            'DISPLAY', '') == '':
        msg = "Graphic display is required but not available for command '{}'. Continue"
        yes = utils.yes_or_no(msg, cmd, yes=False)
        if not yes:
            msg = """
To enable DISPLAY be sure to connect to the server using 'ssh -X'
or else connect to the server's desktop using a local X server like X2Go.

"""
            sys.stdout.write(msg)
            sys.exit(1)

    # Obtain the default/chosen language for the package.

    lang = meta["languages"]

    # Deal with malformed 'languages' field

    lang_opts = {"python": "py", "R": "R"}
    for k in list(lang_opts):
        if lang in k:
            lang = lang_opts[k]
            break

    # Obtain the specified script file.

    script = cmd + "." + lang

    logger = logging.getLogger(__name__)
    logger.debug("Execute the script: " + os.path.join(path, script))

    if cmd not in list(entry['commands']) or not os.path.exists(
            os.path.join(path, script)):
        raise utils.CommandNotFoundException(cmd, model)

    # Determine the interpreter to use
    #
    # .R => Rscript; .py => python, etc.

    interpreter = utils.interpreter(script)

    # Change working dir if needed

    if args.workding_dir is not None:
        script = os.path.join(path, script)
        path = args.workding_dir

    # Handle python environment

    python_pkg_bin = None
    python_pkg_path = None
    if script.endswith('py'):
        python_pkg_base = os.path.sep.join(
            [utils.get_package_dir(model), '.python'])
        python_pkg_path = python_pkg_base + site.USER_SITE
        python_pkg_bin = python_pkg_base + site.USER_BASE + '/bin'

        # TODO: Make sure to document:
        #     $ sudo apt-get install -y python3-pip
        #     $ /usr/bin/pip3 install mlhub
        #   Since in DSVM, the default pip is conda's pip, so if we stick to
        #   use system's command, then the installation of MLHub itself should
        #   be completed via system's pip, otherwise, MLHub will not work.

        if sys.executable != SYS_PYTHON_CMD:
            python_pkg_path = python_pkg_base + site.getsitepackages()[0]
            python_pkg_bin = python_pkg_base + site.PREFIXES[0] + '/bin'
            if utils.get_sys_python_pkg_usage(model):
                utils.print_on_stderr(MSG_INCOMPATIBLE_PYTHON_ENV, model)

    # _MLHUB_CMD_CWD: a environment variable indicates current working
    #                 directory where command `ml xxx` is invoked.
    # _MLHUB_MODEL_NAME: env variable indicates the name of the model.
    #
    # The above two env vars can be obtained by helper function, such
    # as utils.get_cmd_cwd().  And model package developer should be
    # use the helper function instead of the env vars directly.

    env_var = "export _MLHUB_CMD_CWD='{}'; ".format(os.getcwd())
    env_var += "export _MLHUB_MODEL_NAME='{}'; ".format(model)
    env_var += 'export _MLHUB_PYTHON_EXE="{}"; '.format(sys.executable)
    env_var += "export PYTHONPATH='{}'; ".format(
        python_pkg_path) if python_pkg_path else ""
    env_var += "export PATH=\"{}:$PATH\"; ".format(
        python_pkg_bin) if python_pkg_bin else ""

    command = "{}{} {} {}".format(env_var, interpreter, script, param)

    # Run script inside conda environment if specified

    if conda_env_name is not None:
        command = '{} -c "source activate {}; {}"'.format(
            BASH_CMD, conda_env_name, command)

    logger.debug("(cd " + path + "; " + command + ")")

    proc = subprocess.Popen(command,
                            shell=True,
                            cwd=path,
                            stderr=subprocess.PIPE)
    output, errors = proc.communicate()
    missing_r_dep = False
    if proc.returncode != 0:
        errors = errors.decode("utf-8")

        # Check if it is Python dependency unsatisfied

        dep_required = re.compile(
            r"ModuleNotFoundError: No module named '(.*)'").search(errors)

        # Check if R dependency unsatisified

        if dep_required is None:
            dep_required = re.compile(
                r"there is no package called ‘(.*)’").search(errors)
            if dep_required is not None:
                missing_r_dep = True

        # Check if required data resource not found

        data_required = re.compile(
            r"mlhub.utils.DataResourceNotFoundException").search(errors)

        if dep_required is not None:  # Dependency unsatisfied
            dep_required = dep_required.group(1)
            logger.error("Dependency unsatisfied: {}\n{}".format(
                dep_required, errors))
            raise utils.LackDependencyException(dep_required, missing_r_dep,
                                                model)
        elif data_required is not None:  # Data not found
            raise utils.DataResourceNotFoundException()
        else:  # Other unknown errors
            print("An error was encountered:\n")
            print(errors)

    else:
        # Suggest next step

        if not args.quiet:
            utils.print_next_step(cmd, description=entry, model=model)