def launch_remote_check(file: str) -> Tuple[bool, str]:
    logging.info('Launching remote check')
    zip_hdfs, _ = cluster_pack.upload_env(packer=cluster_pack.PEX_PACKER)
    archive_name = os.path.basename(zip_hdfs)
    with skein.Client() as client:
        files = {
            archive_name: zip_hdfs,
            'check_hadoop_env.py': __file__,
        }
        editable_packages = cluster_pack.get_editable_requirements()
        if 'tf_yarn' in editable_packages:
            tf_yarn_zip = cluster_pack.zip_path(editable_packages['tf_yarn'],
                                                False)
            logger.info(f"zip path for editable tf_yarn is {tf_yarn_zip}")
            files.update({'tf_yarn': tf_yarn_zip})
        service = skein.Service(
            script=f'./{archive_name} check_hadoop_env.py --file {file}',
            resources=skein.Resources(2 * 1024, 1),
            env={
                'PEX_ROOT': '/tmp/{uuid.uuid4()}/',
                'PYTHONPATH': '.:',
            },
            files=files,
            instances=1)
        spec = skein.ApplicationSpec(
            {'HADOOP_ENV_CHECKER': service},
            acls=skein.model.ACLs(enable=True, view_users=['*']),
        )
        app = client.submit_and_connect(spec)

        logging.info('Remote check started')
        result = app.kv.wait('result').decode()
        app_id = app.id
        app.shutdown()
        return result == "True", app_id
Exemple #2
0
def _setup_task_env(
        tempdir: str,
        files: Dict[str, str] = None,
        env: Dict[str, str] = {},
        n_try: int = 0
):
    task_files = _maybe_zip_task_files(files or {}, tempdir)
    task_files[__package__] = cluster_pack.zip_path(here, False, tempdir)

    _add_to_env(env, "LIBHDFS_OPTS", "-Xms64m -Xmx512m")

    env["TF_YARN_N_TRY"] = str(n_try)

    task_env = {
        **env,
        # Make Python modules/packages passed via ``files`` importable.
        "PYTHONPATH": ".:" + env.get("PYTHONPATH", ""),
        "PEX_ROOT": os.path.join("/tmp", str(uuid.uuid4()))
    }

    if mlflow.use_mlflow:
        task_env["MLFLOW_RUN_ID"] = mlflow.active_run_id()
        task_env["MLFLOW_TRACKING_URI"] = mlflow.get_tracking_uri()
        task_env["GIT_PYTHON_REFRESH"] = "quiet"

    return task_files, task_env
Exemple #3
0
def _maybe_zip_task_files(files, tempdir):
    task_files = {}
    for target, source in files.items():
        assert target not in task_files
        if os.path.isdir(source):
            source = cluster_pack.zip_path(source, False, tempdir)

        task_files[target] = source
    return task_files
Exemple #4
0
def zip_path(py_dir: str,
             include_base_name=True,
             tmp_dir: str = _get_tmp_dir()):
    return cluster_pack.zip_path(py_dir, include_base_name, tmp_dir)