Esempio n. 1
0
def test_get_editable_requirements():
    with mock.patch(
            f"{MODULE_TO_TEST}._running_from_pex") as mock_running_from_pex:
        mock_running_from_pex.return_value = True
        with tempfile.TemporaryDirectory() as tempdir:
            pkg = _get_editable_package_name()
            _create_editable_files(tempdir, os.path.basename(pkg))
            shutil.copytree(pkg, f"{tempdir}/{os.path.basename(pkg)}")

            editable_requirements = packaging.get_editable_requirements(
                editable_packages_dir=tempdir)
            assert editable_requirements == {os.path.basename(pkg): pkg}
Esempio n. 2
0
    def run(self):
        if self.train_on_yarn:
            # Upload environment(s) to HDFS (CPU and / or GPU environments)
            pyenv_zip_path = {
                tf_yarn.NodeLabel.CPU: self.config.upload_pex_cpu()
            }
            if self.config.tf_yarn == "gpu":
                pyenv_zip_path[
                    tf_yarn.NodeLabel.GPU] = self.config.upload_pex_gpu()

            def _experiment_fn():
                # Remove auto-termination of active MLFlow runs from
                # inside the chief / evaluator
                atexit.unregister(mlflow.end_run)
                return from_config(self.trainer).create_experiment()

            tf_yarn.run_on_yarn(
                acls=skein.model.ACLs(enable=True,
                                      ui_users=["*"],
                                      view_users=["*"]),
                env=self.config.get_env_vars(),
                experiment_fn=_experiment_fn,
                files=get_editable_requirements(),
                name=self.config.name,
                nb_retries=self.config.nb_retries,
                pre_script_hook=self.config.pre_script_hook,
                pyenv_zip_path=pyenv_zip_path,
                queue=self.config.queue,
                task_specs=self.config.get_task_specs(),
            )

            # Run exporters and final evaluation
            trainer = from_config(self.trainer)
            experiment = trainer.create_experiment()
            for exporter in trainer.exporters:
                exporter(experiment.estimator)
            trainer.run_final_evaluation()
        else:
            LOGGER.info("Not training on yarn.")
            trainer = from_config(self.trainer)
            trainer.run()
Esempio n. 3
0
def _upload_env_from_venv(package_path: str,
                          packer: packaging.Packer = packaging.PEX_PACKER,
                          additional_packages: Dict[str, str] = {},
                          ignored_packages: Collection[str] = [],
                          resolved_fs: Any = None,
                          force_upload: bool = False,
                          include_editable: bool = False) -> None:
    current_packages = packaging.get_non_editable_requirements()

    _handle_packages(current_packages, additional_packages, ignored_packages)

    reqs = packaging.format_requirements(current_packages)

    _logger.debug(f"Packaging current_packages={reqs}")

    if force_upload or not _is_archive_up_to_date(package_path, reqs,
                                                  resolved_fs):
        _logger.info(f"Zipping and uploading your env to {package_path}")

        if include_editable:
            editable_requirements = packaging.get_editable_requirements()
        else:
            editable_requirements = {}

        with tempfile.TemporaryDirectory() as tempdir:
            archive_local = packer.pack(
                output=f"{tempdir}/{packer.env_name()}.{packer.extension()}",
                reqs=reqs,
                additional_packages=additional_packages,
                ignored_packages=ignored_packages,
                editable_requirements=editable_requirements)
            dir = os.path.dirname(package_path)
            if not resolved_fs.exists(dir):
                resolved_fs.mkdir(dir)
            resolved_fs.put(archive_local, package_path)

            _dump_archive_metadata(package_path, reqs, resolved_fs)
    else:
        _logger.info(f"{package_path} already exists")
def _get_files(
    package_path: str,
    additional_files: Optional[List[str]] = None,
    tmp_dir: str = packaging._get_tmp_dir()
) -> Dict[str, str]:

    files_to_upload = [package_path]
    if additional_files:
        files_to_upload = files_to_upload + additional_files

    dict_files_to_upload = {
        os.path.basename(path): path
        for path in files_to_upload
    }

    editable_requirements = packaging.get_editable_requirements()

    editable_packages = {
        name: packaging.zip_path(path, False)
        for name, path in editable_requirements.items()
    }
    dict_files_to_upload.update(editable_packages)

    editable_packages_index = f"{tmp_dir}/{packaging.EDITABLE_PACKAGES_INDEX}"

    try:
        os.remove(editable_packages_index)
    except OSError:
        pass

    with open(editable_packages_index, "w+") as file:
        for repo in editable_requirements.keys():
            file.write(repo + "\n")
    dict_files_to_upload[
        packaging.EDITABLE_PACKAGES_INDEX] = editable_packages_index

    return dict_files_to_upload
Esempio n. 5
0
def add_editable_requirements(ssb: SparkSession.Builder):
    for requirement_dir in packaging.get_editable_requirements().values():
        py_archive = packaging.zip_path(requirement_dir)
        _add_archive(ssb, py_archive)