Esempio n. 1
0
    def test_no_custom_modules(self, deployable_entity, model_for_deployment):
        deployable_entity.log_model(model_for_deployment['model'])

        custom_module_filenames = {"__init__.py", "_verta_config.py"}
        for path in sys.path:
            # skip std libs and venvs
            #     This logic is from verta.client._log_modules().
            lib_python_str = os.path.join(os.sep, "lib", "python")
            i = path.find(lib_python_str)
            if i != -1 and glob.glob(os.path.join(path[:i], "bin", "python*")):
                continue

            for parent_dir, dirnames, filenames in os.walk(path):
                # only Python files
                filenames[:] = [
                    filename for filename in filenames
                    if filename.endswith(('.py', '.pyc', '.pyo'))
                ]

                if not _utils.is_in_venv(path) and _utils.is_in_venv(
                        parent_dir):
                    continue
                custom_module_filenames.update(map(os.path.basename,
                                                   filenames))

        custom_modules = deployable_entity.get_artifact(
            _artifact_utils.CUSTOM_MODULES_KEY)
        with zipfile.ZipFile(custom_modules, 'r') as zipf:
            assert custom_module_filenames == set(
                map(os.path.basename, zipf.namelist()))
Esempio n. 2
0
    def test_log_model(self, deployable_entity):
        np = pytest.importorskip("numpy")
        sklearn = pytest.importorskip("sklearn")
        from sklearn.linear_model import LogisticRegression

        classifier = LogisticRegression()
        classifier.fit(np.random.random((36, 12)), np.random.random(36).round())
        original_coef = classifier.coef_
        deployable_entity.log_model(classifier)

        # retrieve the classifier:
        retrieved_classfier = deployable_entity.get_model()
        assert np.array_equal(retrieved_classfier.coef_, original_coef)

        # check model api:
        assert _artifact_utils.MODEL_API_KEY in deployable_entity.get_artifact_keys()
        for artifact in deployable_entity._msg.artifacts:
            if artifact.key == _artifact_utils.MODEL_API_KEY:
                assert artifact.filename_extension == "json"

        # overwrite should work:
        new_classifier = LogisticRegression()
        new_classifier.fit(np.random.random((36, 12)), np.random.random(36).round())
        deployable_entity.log_model(new_classifier, overwrite=True)
        retrieved_classfier = deployable_entity.get_model()
        assert np.array_equal(retrieved_classfier.coef_, new_classifier.coef_)

        # when overwrite = false, overwriting should fail
        with pytest.raises(ValueError) as excinfo:
            deployable_entity.log_model(new_classifier)

        assert "already exists" in str(excinfo.value)

        # Check custom modules:
        custom_module_filenames = {"__init__.py", "_verta_config.py"}
        for path in sys.path:
            # skip std libs and venvs
            #     This logic is from verta.client._log_modules().
            lib_python_str = os.path.join(os.sep, "lib", "python")
            i = path.find(lib_python_str)
            if i != -1 and glob.glob(os.path.join(path[:i], "bin", "python*")):
                continue

            for parent_dir, dirnames, filenames in os.walk(path):
                # only Python files
                filenames[:] = [
                    filename
                    for filename in filenames
                    if filename.endswith((".py", ".pyc", ".pyo"))
                ]

                if not _utils.is_in_venv(path) and _utils.is_in_venv(parent_dir):
                    continue
                custom_module_filenames.update(map(os.path.basename, filenames))

        custom_modules = deployable_entity.get_artifact(_artifact_utils.CUSTOM_MODULES_KEY)
        with zipfile.ZipFile(custom_modules, "r") as zipf:
            assert custom_module_filenames == set(
                map(os.path.basename, zipf.namelist())
            )
    def _custom_modules_as_artifact(self, paths=None):
        if isinstance(paths, six.string_types):
            paths = [paths]

        # If we include a path that is actually a module, then we _must_ add its parent to the
        # adjusted sys.path in the end so that we can re-import with the same name.
        forced_local_sys_paths = []
        if paths is not None:
            new_paths = []
            for p in paths:
                abspath = os.path.abspath(os.path.expanduser(p))
                if os.path.exists(abspath):
                    new_paths.append(abspath)
                else:
                    try:
                        mod = importlib.import_module(p)
                        new_paths.extend(mod.__path__)
                        forced_local_sys_paths.extend(map(os.path.dirname, mod.__path__))
                    except ImportError:
                        raise ValueError("custom module {} does not correspond to an existing folder or module".format(p))

            paths = new_paths

        forced_local_sys_paths = sorted(list(set(forced_local_sys_paths)))

        # collect local sys paths
        local_sys_paths = copy.copy(sys.path)
        ## replace empty first element with cwd
        ##     https://docs.python.org/3/library/sys.html#sys.path
        if local_sys_paths[0] == "":
            local_sys_paths[0] = os.getcwd()
        ## convert to absolute paths
        local_sys_paths = list(map(os.path.abspath, local_sys_paths))
        ## remove paths that don't exist
        local_sys_paths = list(filter(os.path.exists, local_sys_paths))
        ## remove .ipython
        local_sys_paths = list(filter(lambda path: not path.endswith(".ipython"), local_sys_paths))
        ## remove virtual (and real) environments
        local_sys_paths = list(filter(lambda path: not _utils.is_in_venv(path), local_sys_paths))

        # get paths to files within
        if paths is None:
            # Python files within filtered sys.path dirs
            paths = local_sys_paths
            extensions = ['py', 'pyc', 'pyo']
        else:
            # all user-specified files
            paths = paths
            extensions = None
        local_filepaths = _utils.find_filepaths(
            paths, extensions=extensions,
            include_hidden=True,
            include_venv=False,  # ignore virtual environments nested within
        )
        ## remove .git
        local_filepaths = set(filter(lambda path: not path.endswith(".git") and ".git/" not in path,
                                      local_filepaths))

        # obtain deepest common directory
        #     This directory on the local system will be mirrored in `_CUSTOM_MODULES_DIR` in
        #     deployment.
        curr_dir = os.path.join(os.getcwd(), "")
        paths_plus = list(local_filepaths) + [curr_dir]
        common_prefix = os.path.commonprefix(paths_plus)
        common_dir = os.path.dirname(common_prefix)

        # replace `common_dir` with `_CUSTOM_MODULES_DIR` for deployment sys.path
        depl_sys_paths = list(map(lambda path: os.path.relpath(path, common_dir), local_sys_paths + forced_local_sys_paths))
        depl_sys_paths = list(map(lambda path: os.path.join(_CUSTOM_MODULES_DIR, path), depl_sys_paths))

        bytestream = six.BytesIO()
        with zipfile.ZipFile(bytestream, 'w') as zipf:
            for filepath in local_filepaths:
                arcname = os.path.relpath(filepath, common_dir)  # filepath relative to archive root
                try:
                    zipf.write(filepath, arcname)
                except:
                    # maybe file has corrupt metadata; try reading then writing contents
                    with open(filepath, 'rb') as f:
                        zipf.writestr(
                            _artifact_utils.global_read_zipinfo(arcname),
                            f.read(),
                        )

            # add verta config file for sys.path and chdir
            working_dir = os.path.join(_CUSTOM_MODULES_DIR, os.path.relpath(curr_dir, common_dir))
            zipf.writestr(
                _artifact_utils.global_read_zipinfo("_verta_config.py"),
                six.ensure_binary('\n'.join([
                    "import os, sys",
                    "",
                    "",
                    "sys.path = sys.path[:1] + {} + sys.path[1:]".format(depl_sys_paths),
                    "",
                    "try:",
                    "    os.makedirs(\"{}\")".format(working_dir),
                    "except OSError:  # already exists",
                    "    pass",
                    "os.chdir(\"{}\")".format(working_dir),
                ]))
            )

            # add __init__.py
            init_filename = "__init__.py"
            if init_filename not in zipf.namelist():
                zipf.writestr(
                    _artifact_utils.global_read_zipinfo(init_filename),
                    b"",
                )

        bytestream.seek(0)

        return bytestream
Esempio n. 4
0
 def test_is_not_in_venv(self):
     # NOTE: assumes there isn't a venv in the client test dir
     for root, _, files in os.walk("."):
         for filename in files:
             abs_filepath = os.path.abspath(os.path.join(root, filename))
             assert not _utils.is_in_venv(abs_filepath)
Esempio n. 5
0
 def test_is_in_venv(self, prefix, filepath, in_fake_venv):
     abs_filepath = os.path.abspath(os.path.join(prefix, filepath))
     assert _utils.is_in_venv(abs_filepath)