def init_zoo(args): logger.info("Create the Dockerfile for the model zoo.") # Copy cluster spec file to the current directory if specified cluster_spec_path = args.cluster_spec cluster_spec_name = None if cluster_spec_path: if not os.path.exists(cluster_spec_path): raise RuntimeError( "The cluster spec {} doesn't exist".format(cluster_spec_path) ) try: shutil.copy2(cluster_spec_path, os.getcwd()) except shutil.SameFileError: pass cluster_spec_name = os.path.basename(cluster_spec_path) # Create the docker file # Build the content from the template and arguments tmpl_str = """\ FROM {{ BASE_IMAGE }} as base {% if LOCAL_PKG_DIR %}\ COPY {{LOCAL_PKG_DIR}}/*.whl / RUN pip install /*.whl --extra-index-url={{ EXTRA_PYPI_INDEX }} && rm /*.whl {% else %} RUN pip install elasticdl_preprocessing\ --extra-index-url={{ EXTRA_PYPI_INDEX }} RUN pip install elasticdl --extra-index-url={{ EXTRA_PYPI_INDEX }} {% endif -%} RUN /bin/bash -c\ 'PYTHON_PKG_PATH=$(pip3 show elasticdl | grep "Location:" | cut -d " " -f2);\ echo "PATH=${PYTHON_PKG_PATH}/elasticdl/go/bin:$PATH" >>\ /root/.bashrc_elasticdl;\ echo ". /root/.bashrc_elasticdl" >> /root/.bashrc' COPY {{MODEL_ZOO_PATH}} /model_zoo RUN pip install -r /model_zoo/requirements.txt\ --extra-index-url={{ EXTRA_PYPI_INDEX }} {% if CLUSTER_SPEC_NAME %}\ COPY ./{{ CLUSTER_SPEC_NAME }} {{CLUSTER_SPEC_DIR}}/{{ CLUSTER_SPEC_NAME }}\ {% endif %} """ template = Template(tmpl_str) docker_file_content = template.render( BASE_IMAGE=args.base_image, EXTRA_PYPI_INDEX=args.extra_pypi_index, CLUSTER_SPEC_NAME=cluster_spec_name, LOCAL_PKG_DIR=args.local_pkg_dir, CLUSTER_SPEC_DIR=ClusterSpecConfig.CLUSTER_SPEC_DIR, MODEL_ZOO_PATH=args.model_zoo, ) with open("./Dockerfile", mode="w") as f: f.write(docker_file_content)
def _submit_job(image_name, client_args, container_args): client = k8s.Client( image_name=image_name, namespace=client_args.namespace, job_name=client_args.job_name, cluster_spec=client_args.cluster_spec, cluster_spec_json=client_args.cluster_spec_json, force_use_kube_config_file=client_args.force_use_kube_config_file, ) container_args = wrap_python_args_with_string(container_args) master_client_command = ( BashCommandTemplate.SET_PIPEFAIL + " python -m elasticdl.python.master.main" ) container_args.insert(0, master_client_command) if client_args.log_file_path: container_args.append( BashCommandTemplate.REDIRECTION.format(client_args.log_file_path) ) python_command = " ".join(container_args) container_args = ["-c", python_command] if client_args.yaml: client.dump_master_yaml( resource_requests=client_args.master_resource_request, resource_limits=client_args.master_resource_limit, args=container_args, pod_priority=client_args.master_pod_priority, image_pull_policy=client_args.image_pull_policy, restart_policy=client_args.restart_policy, volume=client_args.volume, envs=parse_envs(client_args.envs), yaml=client_args.yaml, ) logger.info( "ElasticDL job %s YAML has been dumped into file %s." % (client_args.job_name, client_args.yaml) ) else: client.create_master( resource_requests=client_args.master_resource_request, resource_limits=client_args.master_resource_limit, args=container_args, pod_priority=client_args.master_pod_priority, image_pull_policy=client_args.image_pull_policy, restart_policy=client_args.restart_policy, volume=client_args.volume, envs=parse_envs(client_args.envs), ) logger.info( "ElasticDL job %s was successfully submitted. " "The master pod is: %s." % (client_args.job_name, client.get_master_pod_name()) )
def build_zoo(args): logger.info("Build the image for the model zoo.") # Call docker api to build the image # Validate the image name schema client = docker.DockerClient.from_env() for line in client.api.build( dockerfile="./Dockerfile", path=args.path, rm=True, tag=args.image, decode=True, ): _print_docker_progress(line)
def __init__( self, *, image_name, namespace, job_name, cluster_spec="", force_use_kube_config_file=False ): """ ElasticDL k8s client. Args: image_name: Docker image path for ElasticDL pod. namespace: The name of the Kubernetes namespace where ElasticDL pods will be created. job_name: ElasticDL job name, should be unique in the namespace. Used as pod name prefix and value for "elasticdl" label. force_use_kube_config_file: If true, force to load the cluster config from ~/.kube/config. Otherwise, if it's in a process running in a K8S environment, it loads the incluster config, if not, it loads the kube config file. """ try: if ( os.getenv("KUBERNETES_SERVICE_HOST") and not force_use_kube_config_file ): # We are running inside a k8s cluster config.load_incluster_config() logger.info("Load the incluster config.") else: # Use user's kube config config.load_kube_config() logger.info("Load the kube config file.") except Exception as ex: traceback.print_exc() raise Exception( "Failed to load configuration for Kubernetes:\n%s" % str(ex) ) self.client = client.CoreV1Api() self.namespace = namespace self.job_name = job_name self._image_name = image_name self.cluster = None if cluster_spec: cluster_spec_module = load_module(cluster_spec) self.cluster = cluster_spec_module.cluster
def init_zoo(args): logger.info("Create the Dockerfile for the model zoo.") # Copy cluster spec file to the current directory if specified cluster_spec_path = args.cluster_spec cluster_spec_name = None if cluster_spec_path: if not os.path.exists(cluster_spec_path): raise RuntimeError( "The cluster spec {} doesn't exist".format(cluster_spec_path)) shutil.copy2(cluster_spec_path, os.getcwd()) cluster_spec_name = os.path.basename(cluster_spec_path) # Create the docker file # Build the content from the template and arguments tmpl_str = """\ FROM {{ BASE_IMAGE }} as base RUN pip install elasticdl_preprocessing\ --extra-index-url={{ EXTRA_PYPI_INDEX }} RUN pip install elasticdl --extra-index-url={{ EXTRA_PYPI_INDEX }} ENV PATH /usr/local/lib/python3.6/dist-packages/elasticdl/go/bin:$PATH COPY . /model_zoo RUN pip install -r /model_zoo/requirements.txt\ --extra-index-url={{ EXTRA_PYPI_INDEX }} {% if CLUSTER_SPEC_NAME %}\ COPY ./{{ CLUSTER_SPEC_NAME }} /cluster_spec/{{ CLUSTER_SPEC_NAME }}\ {% endif %} """ template = Template(tmpl_str) docker_file_content = template.render( BASE_IMAGE=args.base_image, EXTRA_PYPI_INDEX=args.extra_pypi_index, CLUSTER_SPEC_NAME=cluster_spec_name, ) with open("./Dockerfile", mode="w") as f: f.write(docker_file_content)
def delete_master(self): logger.info("pod name is %s" % self.get_master_pod_name()) self.delete_pod(self.get_master_pod_name())
def create_master(self, **kargs): pod = self._create_master_pod_obj(**kargs) self.client.create_namespaced_pod(self.namespace, pod) logger.info("Master launched.")
def push_zoo(args): logger.info("Push the image for the model zoo.") # Call docker api to push the image to remote registry client = docker.DockerClient.from_env() for line in client.api.push(args.image, stream=True, decode=True): _print_docker_progress(line)