def run(self) -> None: conn: t.Text = self.get_connection_string() remote_env: PythonInterpreter = PythonInterpreter.create( self.mlcube.runner.interpreter) # The 'remote_path' variable points to the MLCube root directory on remote host. remote_path: t.Text = os.path.join( self.mlcube.runner.remote_root, os.path.basename(self.mlcube.runtime.root)) try: cmd = f"mlcube run --mlcube=. --platform={self.mlcube.runner.platform} --task={self.task}" Shell.ssh( conn, f'{remote_env.activate_cmd(noop=":")} && cd {remote_path} && {cmd}' ) except ExecutionError as err: raise ExecutionError.mlcube_run_error( self.__class__.__name__, f"Error occurred while running MLCube task (name={self.task}).", **err.context) # Sync back results try: # TODO: Only workspace/ directory is synced. Better solution? Shell.rsync_dirs(source=f'{conn}:{remote_path}/workspace/', dest=f'{self.mlcube.runtime.root}/workspace/') except ExecutionError as err: raise ExecutionError.mlcube_run_error( self.__class__.__name__, "Error occurred while syncing workspace.", **err.context)
def configure(self) -> None: """Run 'configure' phase for SHH runner.""" conn: t.Text = self.get_connection_string() remote_env: PythonInterpreter = PythonInterpreter.create( self.mlcube.runner.interpreter) # If required, create and configure python environment on remote host try: Shell.ssh(conn, remote_env.create_cmd()) except ExecutionError as err: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, f"Error occurred while creating remote python environment (env={remote_env}).", **err.context) try: Shell.ssh(conn, remote_env.configure_cmd()) except ExecutionError as err: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, f"Error occurred while configuring remote python environment (env={remote_env}).", **err.context) # The 'local_path' and 'remote_path' must both be directories. try: local_path: str = self.mlcube.runtime.root remote_path: str = os.path.join(self.mlcube.runner.remote_root, os.path.basename(local_path)) Shell.ssh(conn, f'mkdir -p {remote_path}') Shell.rsync_dirs(source=f'{local_path}/', dest=f'{conn}:{remote_path}/') except ExecutionError as err: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, "Error occurred while syncing local and remote folders.", **err.context) # Configure remote MLCube runner. Idea is that we use chain of runners, for instance, SHH Runner -> Docker # runner. So, the runner to be used on a remote host must configure itself. try: cmd = f"mlcube configure --mlcube=. --platform={self.mlcube.runner.platform}" Shell.ssh( conn, f'{remote_env.activate_cmd(noop=":")} && cd {remote_path} && {cmd}' ) except ExecutionError as err: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, "Error occurred while configuring MLCube on a remote machine.", **err.context)
def create() -> None: """ Create a new MLCube using cookiecutter template. - MLCube cookiecutter: https://github.com/mlcommons/mlcube_cookiecutter - Example: https://mlcommons.github.io/mlcube/tutorials/create-mlcube/ """ mlcube_cookiecutter_url = 'https://github.com/mlcommons/mlcube_cookiecutter' try: from cookiecutter.main import cookiecutter proj_dir: t.Text = cookiecutter(mlcube_cookiecutter_url) if proj_dir and os.path.isfile(os.path.join(proj_dir, 'mlcube.yaml')): Shell.run(['mlcube', 'describe', '--mlcube', proj_dir], on_error='die') except ImportError: print("Cookiecutter library not found.") print("\tInstall it: pip install cookiecutter") print(f"\tMore details: {mlcube_cookiecutter_url}")
def configure(self) -> None: """Build Singularity Image on a current host.""" SingularityRun.check_install() s_cfg: DictConfig = self.mlcube.runner # Get full path to a singularity image. By design, we compute it relative to {mlcube.root}/workspace. image_file = Path(s_cfg.image_dir, s_cfg.image) if image_file.exists(): logger.info( "SingularityRun SIF exists (%s) - no need to run the configure step.", image_file, ) return # Make sure a directory to store image exists. If paths are like "/opt/...", the call may fail. image_file.parent.mkdir(parents=True, exist_ok=True) build_path = Path( self.mlcube.runtime.root ) # Let's assume that build context is the root MLCube directory recipe: str = s_cfg.build_file # This is the recipe file, or docker image. if recipe.startswith("docker://") or recipe.startswith( "docker-archive:"): # https://sylabs.io/guides/3.0/user-guide/build_a_container.html # URI beginning with docker:// to build from Docker Hub logger.info("SingularityRun building SIF from docker image (%s).", recipe) else: # This must be a recipe file. Make sure it exists. if not Path(build_path, recipe).exists(): raise IOError( f"SIF recipe file does not exist (path={build_path}, file={recipe})" ) logger.info("Building SIF from recipe file (path=%s, file=%s).", build_path, recipe) try: Shell.run([ 'cd', str(build_path), ';', s_cfg.singularity, 'build', s_cfg.build_args, str(image_file), recipe ]) except ExecutionError as err: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, "Error occurred while building SIF image. See context for more details.", **err.context)
def test_run_01(self) -> None: for cmd in ('python --version', ['python', '--version']): for die_on_error in (True, False): exit_code = Shell.run(cmd, on_error='die') self.assertEqual( exit_code, 0, f"cmd = {cmd}, die_on_error = {die_on_error}")
def validate(mlcube: DictConfig) -> None: """ Initialize configuration from user config Args: mlcube: MLCube `container` configuration, possible merged with user local configuration. Return: Initialized configuration. """ # Make sure all parameters present with their default values. validator = Validate(mlcube.runner, 'runner') _ = validator.check_unknown_keys(Config.DEFAULT.keys())\ .check_values(['image', 'docker', 'build_strategy'], str, blanks=False) Config.BuildStrategy.validate(mlcube.runner.build_strategy) if isinstance(mlcube.runner.build_args, DictConfig): mlcube.runner.build_args = Shell.to_cli_args(mlcube.runner.build_args, parent_arg='--build-arg') if isinstance(mlcube.runner.env_args, DictConfig): mlcube.runner.env_args = Shell.to_cli_args(mlcube.runner.env_args, parent_arg='-e')
def test_run_02(self) -> None: cmds = [ 'python -c "print(message)"', 'python -c "import os, signal; os.kill(os.getpid(), signal.SIGUSR1);"', '8389dfb48c6f4a1aaa16bdda76c1fb11' ] for cmd in cmds: exit_code = Shell.run(cmd, on_error='ignore') self.assertGreater(exit_code, 0, f"cmd = {cmd}")
def configure(self) -> None: """Build Docker image on a current host.""" image: t.Text = self.mlcube.runner.image context: t.Text = os.path.abspath(os.path.join(self.mlcube.runtime.root, self.mlcube.runner.build_context)) recipe: t.Text = os.path.abspath(os.path.join(context, self.mlcube.runner.build_file)) docker: t.Text = self.mlcube.runner.docker # Build strategies: `pull`, `auto` and `always`. build_strategy: t.Text = self.mlcube.runner.build_strategy build_recipe_exists: bool = os.path.exists(recipe) if build_strategy == Config.BuildStrategy.PULL or not build_recipe_exists: logger.info("Will pull image (%s) because (build_strategy=%s, build_recipe_exists=%r)", image, build_strategy, build_recipe_exists) if build_recipe_exists: logger.warning( "Docker recipe exists (%s), but your build strategy is `%s`, and so the image will be pulled, not " "built. Make sure your image is up-to-date with your source code. If you want to rebuilt MLCube " "docker image locally, rerun with `-Prunner.build_strategy=always`.", recipe, build_strategy ) try: Shell.run([docker, 'pull', image]) except ExecutionError as err: description = f"Error occurred while pulling docker image (docker={docker}, image={image})." if build_recipe_exists: description += \ f" By the way, docker recipe ({recipe}) exists, but your build strategy is set to "\ "pull. Consider rerunning with: `-Prunner.build_strategy=auto` to build image locally." raise ExecutionError.mlcube_configure_error(self.__class__.__name__, description, **err.context) else: logger.info("Will build image (%s) because (build_strategy=%s, build_recipe_exists=%r)", image, build_strategy, build_recipe_exists) build_args: t.Text = self.mlcube.runner.build_args try: Shell.run([docker, 'build', build_args, '-t', image, '-f', recipe, context]) except ExecutionError as err: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, f"Error occurred while building docker image (docker={docker}, build_args={build_args}, " f"image={image}, recipe={recipe}, context={context}).", **err.context )
def run(self) -> None: """ Run a cube. """ docker: t.Text = self.mlcube.runner.docker image: t.Text = self.mlcube.runner.image build_strategy: t.Text = self.mlcube.runner.build_strategy if build_strategy == Config.BuildStrategy.ALWAYS or not Shell.docker_image_exists(docker, image): logger.warning("Docker image (%s) does not exist or build strategy is 'always'. " "Will run 'configure' phase.", image) self.configure() # Deal with user-provided workspace try: Shell.sync_workspace(self.mlcube, self.task) except Exception as err: raise ExecutionError.mlcube_run_error( self.__class__.__name__, f"Error occurred while syncing MLCube workspace (task={self.task}). Actual error is {type(err)} - see " "context for details.", error=str(err) ) # The 'mounts' dictionary maps host paths to container paths try: mounts, task_args = Shell.generate_mounts_and_args(self.mlcube, self.task) except ConfigurationError as err: raise ExecutionError.mlcube_run_error( self.__class__.__name__, f"Error occurred while generating mount points for docker run command (task={self.task}). See context " "for details and check your MLCube configuration file.", error=str(err) ) logger.info(f"mounts={mounts}, task_args={task_args}") volumes = Shell.to_cli_args(mounts, sep=':', parent_arg='--volume') env_args = self.mlcube.runner.env_args num_gpus: int = self.mlcube.platform.get('accelerator_count', None) or 0 run_args: t.Text = self.mlcube.runner.cpu_args if num_gpus == 0 else self.mlcube.runner.gpu_args try: Shell.run([docker, 'run', run_args, env_args, volumes, image, ' '.join(task_args)]) except ExecutionError as err: raise ExecutionError.mlcube_run_error( self.__class__.__name__, f"Error occurred while running MLCube task (docker={docker}, run_args={run_args}, env_args={env_args}, " f"volumes={volumes}, image={image}, task_args={task_args}).", **err.context )
def run(self) -> None: """ """ image_file = Path( self.mlcube.runner.image_dir) / self.mlcube.runner.image if not image_file.exists(): self.configure() else: SingularityRun.check_install() # Deal with user-provided workspace try: Shell.sync_workspace(self.mlcube, self.task) except Exception as err: raise ExecutionError.mlcube_run_error( self.__class__.__name__, "Error occurred while syncing MLCube workspace. See context for more details.", error=str(err)) try: mounts, task_args = Shell.generate_mounts_and_args( self.mlcube, self.task) logger.info(f"mounts={mounts}, task_args={task_args}") except ConfigurationError as err: raise ExecutionError.mlcube_run_error( self.__class__.__name__, "Error occurred while generating mount points for singularity run command. See context for more " "details and check your MLCube configuration file.", error=str(err)) volumes = Shell.to_cli_args(mounts, sep=":", parent_arg="--bind") try: Shell.run([ self.mlcube.runner.singularity, 'run', self.mlcube.runner.run_args, volumes, str(image_file), ' '.join(task_args) ]) except ExecutionError as err: raise ExecutionError.mlcube_run_error( self.__class__.__name__, f"Error occurred while running MLCube task (task={self.task}). See context for more details.", **err.context)
def test_run_03(self) -> None: with self.assertRaises(ExecutionError): _ = Shell.run('python -c "print(message)"', on_error='raise')