def test_upload_env(): mock_packer = mock.MagicMock(spec=packaging.Packer) with contextlib.ExitStack() as stack: # Mock all objects mock_is_archive = stack.enter_context( mock.patch(f"{MODULE_TO_TEST}._is_archive_up_to_date")) mock_get_packages = stack.enter_context( mock.patch(f"{MODULE_TO_TEST}.get_non_editable_requirements")) mock_tf = stack.enter_context(mock.patch(f"{MODULE_TO_TEST}.tf")) stack.enter_context( mock.patch(f"{MODULE_TO_TEST}._dump_archive_metadata")) stack.enter_context(mock.patch(f"{MODULE_TO_TEST}.shutil.rmtree")) # Regenerate archive mock_is_archive.return_value = False mock_get_packages.return_value = [{ "name": "a", "version": "1.0" }, { "name": "b", "version": "2.0" }] mock_packer.pack.return_value = MYARCHIVE_FILENAME mock_packer.extension = "pex" packaging.upload_env_to_hdfs(MYARCHIVE_FILENAME, mock_packer) mock_packer.pack.assert_called_once_with(output=Any(str), reqs={ "a": "1.0", "b": "2.0" }) mock_tf.gfile.Copy.assert_called_once_with(MYARCHIVE_FILENAME, MYARCHIVE_FILENAME, overwrite=True)
def test_upload_env_to_hdfs_in_a_pex(): home_path = '/home/j.doe' home_hdfs_path = '/user/j.doe' with contextlib.ExitStack() as stack: mock_pex_filepath = stack.enter_context( mock.patch(f"{MODULE_TO_TEST}.get_current_pex_filepath")) mock_tf = stack.enter_context(mock.patch(f"{MODULE_TO_TEST}.tf")) mock__get_archive_metadata_path = stack.enter_context( mock.patch(f"{MODULE_TO_TEST}._get_archive_metadata_path")) mock__get_archive_metadata_path.return_value = f"{home_hdfs_path}/blah.json" mock_pex_filepath.return_value = f"{home_path}/myapp.pex" # Metadata already exists on hdfs mock_tf.gfile.Exists.return_value = True result = packaging.upload_env_to_hdfs(f'{home_hdfs_path}/blah.pex') mock_tf.gfile.MakeDirs.assert_called_once_with(home_hdfs_path) mock_tf.gfile.Copy.assert_called_once_with( f'{home_path}/myapp.pex', f'{home_hdfs_path}/blah.pex', overwrite=True) # Check metadata has been cleaned mock_tf.gfile.Remove.assert_called_once_with( f'{home_hdfs_path}/blah.json') # check envname assert 'myapp' == result[1]
def upload_pex( path_pex: str, path_pex_existing: str = None, additional_packages: Dict = None, ignored_packages: List = None ) -> str: """Upload Current Environment and return path to PEX on HDFS""" if path_pex_existing is None: LOGGER.info(f"Uploading env to {path_pex}") packaging.upload_env_to_hdfs( archive_on_hdfs=path_pex, additional_packages=additional_packages if additional_packages else {}, ignored_packages=ignored_packages if ignored_packages else [], packer=cluster_pack.packaging.PEX_PACKER, ) elif not Path(path_pex_existing).is_hdfs: LOGGER.info(f"Uploading env to {path_pex}") packaging.upload_zip_to_hdfs(path_pex_existing, archive_on_hdfs=path_pex) else: LOGGER.info(f"Skipping upload, PEX {path_pex_existing} already exists") path_pex = path_pex_existing return path_pex
def main(): pyenv_zip_path, env_name = packaging.upload_env_to_hdfs() editable_requirements = packaging.get_editable_requirements_from_current_venv() session_config = tf.ConfigProto(operation_timeout_in_ms=300000) with standalone_client_mode( pyenv_zip_path, task_specs={ NODE_NAME: TaskSpec(memory="2 GiB", vcores=4, instances=2) }, tf_session_config=session_config, files={ **editable_requirements, }, acls=skein.model.ACLs( enable=True, view_users=['*'] )) as cluster_spec: size = 10000 x = tf.placeholder(tf.float32, size) with tf.device(f"/job:{NODE_NAME}/task:1"): with tf.name_scope("scope_of_task1"): first_batch = tf.slice(x, [5000], [-1]) mean1 = tf.reduce_mean(first_batch) with tf.device(f"/job:{NODE_NAME}/task:0"): with tf.name_scope("scope_of_task0"): second_batch = tf.slice(x, [0], [5000]) mean2 = tf.reduce_mean(second_batch) mean = (mean1 + mean2) / 2 cluster_spec_dict = cluster_spec.as_dict() first_task = next(iter(cluster_spec_dict.values()))[0] logger.info("cluster_spec:" + str(cluster_spec_dict)) logger.info("connecting to target:" + first_task) with tf.Session(f"grpc://{first_task}", config=session_config) as sess: result = sess.run(mean, feed_dict={x: np.random.random(size)}) print(f"mean = {result}")
def launch_remote_check(file: str) -> Tuple[bool, str]: logging.info('Launching remote check') zip_hdfs, _ = packaging.upload_env_to_hdfs(packer=packaging.PEX_PACKER) archive_name = os.path.basename(zip_hdfs) with skein.Client() as client: files = { archive_name: zip_hdfs, 'check_hadoop_env.py': __file__, } editable_packages = packaging.get_editable_requirements_from_current_venv() if 'tf_yarn' in editable_packages: tf_yarn_zip = packaging.zip_path(editable_packages['tf_yarn'], False) logger.info(f"zip path for editable tf_yarn is {tf_yarn_zip}") files.update({'tf_yarn': tf_yarn_zip}) service = skein.Service( script=f'./{archive_name} check_hadoop_env.py --file {file}', resources=skein.Resources(2*1024, 1), env={ 'PEX_ROOT': '/tmp/{uuid.uuid4()}/', 'PYTHONPATH': '.:', }, files=files, instances=1 ) spec = skein.ApplicationSpec( {'HADOOP_ENV_CHECKER': service}, acls=skein.model.ACLs( enable=True, view_users=['*'] ), ) app = client.submit_and_connect(spec) logging.info('Remote check started') result = app.kv.wait('result').decode() app_id = app.id app.shutdown() return result == "True", app_id
def test_upload_env_to_hdfs_should_throw_error_if_wrong_extension(): with pytest.raises(ValueError): packaging.upload_env_to_hdfs("myarchive.tar.gz", packer=packaging.CONDA_PACKER)
def main(): def experiment_fn() -> Experiment: train_data, test_data = winequality.get_train_eval_datasets(WINE_EQUALITY_FILE) def convert_to_tensor(x, y): return (tf.convert_to_tensor(list(x.values()), dtype=tf.float32), tf.convert_to_tensor(y, dtype=tf.int32)) def train_input_fn(): return (train_data.map(convert_to_tensor) .shuffle(1000) .batch(128) .repeat() .make_one_shot_iterator() .get_next()) def eval_input_fn(): return (test_data.map(convert_to_tensor) .shuffle(1000) .batch(128) .make_one_shot_iterator() .get_next()) model = keras.Sequential() model.add(keras.layers.Dense(units=300, activation="relu", input_shape=(11,))) model.add(keras.layers.Dense(units=100, activation="relu")) model.add(keras.layers.Dense(units=10, activation="softmax")) model.summary() model.compile(loss='sparse_categorical_crossentropy', optimizer="sgd", metrics=['accuracy']) config = tf.estimator.RunConfig(model_dir=HDFS_DIR) estimator = tf.keras.estimator.model_to_estimator(model, config=config) return Experiment( estimator, tf.estimator.TrainSpec( train_input_fn, max_steps=1000), tf.estimator.EvalSpec( eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30)) # forcing call to model_to_estimator._save_first_checkpoint l457 # https://github.com/tensorflow/estimator/blob/ \ # 1d55f01d8af871a35ef83fc3354b9feaa671cbe1/tensorflow_estimator/python/estimator/keras.py # otherwise there is a race condition # when all workers try to save the first checkpoint at the same time experiment_fn() pyenv_zip_path, env_name = packaging.upload_env_to_hdfs() editable_requirements = packaging.get_editable_requirements_from_current_venv() run_on_yarn( pyenv_zip_path, experiment_fn, task_specs={ "chief": TaskSpec(memory="2 GiB", vcores=4), "worker": TaskSpec(memory="2 GiB", vcores=4, instances=4), "ps": TaskSpec(memory="2 GiB", vcores=4, instances=2), "evaluator": TaskSpec(memory="2 GiB", vcores=1) }, files={ **editable_requirements, os.path.basename(winequality.__file__): winequality.__file__, }, acls=skein.model.ACLs( enable=True, view_users=['*'] ) )
predictions={"x": x}, eval_metric_ops={}) def experiment_fn() -> Experiment: def input_fn(): x = tf.constant([[1.0], [2.0], [3.0], [4.0]]) return {"x": x}, x estimator = tf.estimator.Estimator(model_fn=model_fn) train_spec = tf.estimator.TrainSpec(input_fn, max_steps=1) eval_spec = tf.estimator.EvalSpec(input_fn, steps=1) return Experiment(estimator, train_spec, eval_spec) if __name__ == "__main__": pyenv_zip_path, env_name = packaging.upload_env_to_hdfs() editable_requirements = packaging.get_editable_requirements_from_current_venv( ) # skein.Client is useful when multiple learnings run in parallel # and share one single skein JAVA process with skein.Client() as client: run_on_yarn(pyenv_zip_path, experiment_fn, task_specs={"chief": TaskSpec(memory=64, vcores=1)}, files={ **editable_requirements, }, acls=skein.model.ACLs(enable=True, view_users=['*']), skein_client=client)