Esempio n. 1
0
def init_zoo(args):
    logger.info("Create the Dockerfile for the model zoo.")

    # Copy cluster spec file to the current directory if specified
    cluster_spec_path = args.cluster_spec
    cluster_spec_name = None
    if cluster_spec_path:
        if not os.path.exists(cluster_spec_path):
            raise RuntimeError(
                "The cluster spec {} doesn't exist".format(cluster_spec_path)
            )
        try:
            shutil.copy2(cluster_spec_path, os.getcwd())
        except shutil.SameFileError:
            pass
        cluster_spec_name = os.path.basename(cluster_spec_path)

    # Create the docker file
    # Build the content from the template and arguments
    tmpl_str = """\
FROM {{ BASE_IMAGE }} as base

{% if LOCAL_PKG_DIR %}\
COPY {{LOCAL_PKG_DIR}}/*.whl /
RUN pip install /*.whl --extra-index-url={{ EXTRA_PYPI_INDEX }} && rm /*.whl
{% else %}
RUN pip install elasticdl_preprocessing\
 --extra-index-url={{ EXTRA_PYPI_INDEX }}

RUN pip install elasticdl --extra-index-url={{ EXTRA_PYPI_INDEX }}
{% endif -%}

RUN /bin/bash -c\
 'PYTHON_PKG_PATH=$(pip3 show elasticdl | grep "Location:" | cut -d " " -f2);\
 echo "PATH=${PYTHON_PKG_PATH}/elasticdl/go/bin:$PATH" >>\
 /root/.bashrc_elasticdl;\
 echo ". /root/.bashrc_elasticdl" >> /root/.bashrc'

COPY {{MODEL_ZOO_PATH}} /model_zoo
RUN pip install -r /model_zoo/requirements.txt\
 --extra-index-url={{ EXTRA_PYPI_INDEX }}

{% if CLUSTER_SPEC_NAME  %}\
COPY ./{{ CLUSTER_SPEC_NAME }} {{CLUSTER_SPEC_DIR}}/{{ CLUSTER_SPEC_NAME }}\
{% endif %}
"""
    template = Template(tmpl_str)
    docker_file_content = template.render(
        BASE_IMAGE=args.base_image,
        EXTRA_PYPI_INDEX=args.extra_pypi_index,
        CLUSTER_SPEC_NAME=cluster_spec_name,
        LOCAL_PKG_DIR=args.local_pkg_dir,
        CLUSTER_SPEC_DIR=ClusterSpecConfig.CLUSTER_SPEC_DIR,
        MODEL_ZOO_PATH=args.model_zoo,
    )

    with open("./Dockerfile", mode="w") as f:
        f.write(docker_file_content)
Esempio n. 2
0
def _submit_job(image_name, client_args, container_args):
    client = k8s.Client(
        image_name=image_name,
        namespace=client_args.namespace,
        job_name=client_args.job_name,
        cluster_spec=client_args.cluster_spec,
        cluster_spec_json=client_args.cluster_spec_json,
        force_use_kube_config_file=client_args.force_use_kube_config_file,
    )

    container_args = wrap_python_args_with_string(container_args)

    master_client_command = (
        BashCommandTemplate.SET_PIPEFAIL
        + " python -m elasticdl.python.master.main"
    )
    container_args.insert(0, master_client_command)
    if client_args.log_file_path:
        container_args.append(
            BashCommandTemplate.REDIRECTION.format(client_args.log_file_path)
        )

    python_command = " ".join(container_args)
    container_args = ["-c", python_command]

    if client_args.yaml:
        client.dump_master_yaml(
            resource_requests=client_args.master_resource_request,
            resource_limits=client_args.master_resource_limit,
            args=container_args,
            pod_priority=client_args.master_pod_priority,
            image_pull_policy=client_args.image_pull_policy,
            restart_policy=client_args.restart_policy,
            volume=client_args.volume,
            envs=parse_envs(client_args.envs),
            yaml=client_args.yaml,
        )
        logger.info(
            "ElasticDL job %s YAML has been dumped into file %s."
            % (client_args.job_name, client_args.yaml)
        )
    else:
        client.create_master(
            resource_requests=client_args.master_resource_request,
            resource_limits=client_args.master_resource_limit,
            args=container_args,
            pod_priority=client_args.master_pod_priority,
            image_pull_policy=client_args.image_pull_policy,
            restart_policy=client_args.restart_policy,
            volume=client_args.volume,
            envs=parse_envs(client_args.envs),
        )
        logger.info(
            "ElasticDL job %s was successfully submitted. "
            "The master pod is: %s."
            % (client_args.job_name, client.get_master_pod_name())
        )
Esempio n. 3
0
def build_zoo(args):
    logger.info("Build the image for the model zoo.")
    # Call docker api to build the image
    # Validate the image name schema
    client = docker.DockerClient.from_env()
    for line in client.api.build(
            dockerfile="./Dockerfile",
            path=args.path,
            rm=True,
            tag=args.image,
            decode=True,
    ):
        _print_docker_progress(line)
Esempio n. 4
0
    def __init__(
        self,
        *,
        image_name,
        namespace,
        job_name,
        cluster_spec="",
        force_use_kube_config_file=False
    ):
        """
        ElasticDL k8s client.

        Args:
            image_name: Docker image path for ElasticDL pod.
            namespace: The name of the Kubernetes namespace where ElasticDL
                pods will be created.
            job_name: ElasticDL job name, should be unique in the namespace.
                Used as pod name prefix and value for "elasticdl" label.
            force_use_kube_config_file: If true, force to load the cluster
                config from ~/.kube/config. Otherwise, if it's in a process
                running in a K8S environment, it loads the incluster config,
                if not, it loads the kube config file.
        """
        try:
            if (
                os.getenv("KUBERNETES_SERVICE_HOST")
                and not force_use_kube_config_file
            ):
                # We are running inside a k8s cluster
                config.load_incluster_config()
                logger.info("Load the incluster config.")
            else:
                # Use user's kube config
                config.load_kube_config()
                logger.info("Load the kube config file.")
        except Exception as ex:
            traceback.print_exc()
            raise Exception(
                "Failed to load configuration for Kubernetes:\n%s" % str(ex)
            )

        self.client = client.CoreV1Api()
        self.namespace = namespace
        self.job_name = job_name
        self._image_name = image_name
        self.cluster = None
        if cluster_spec:
            cluster_spec_module = load_module(cluster_spec)
            self.cluster = cluster_spec_module.cluster
Esempio n. 5
0
def init_zoo(args):
    logger.info("Create the Dockerfile for the model zoo.")

    # Copy cluster spec file to the current directory if specified
    cluster_spec_path = args.cluster_spec
    cluster_spec_name = None
    if cluster_spec_path:
        if not os.path.exists(cluster_spec_path):
            raise RuntimeError(
                "The cluster spec {} doesn't exist".format(cluster_spec_path))
        shutil.copy2(cluster_spec_path, os.getcwd())
        cluster_spec_name = os.path.basename(cluster_spec_path)

    # Create the docker file
    # Build the content from the template and arguments
    tmpl_str = """\
FROM {{ BASE_IMAGE }} as base

RUN pip install elasticdl_preprocessing\
 --extra-index-url={{ EXTRA_PYPI_INDEX }}

RUN pip install elasticdl --extra-index-url={{ EXTRA_PYPI_INDEX }}
ENV PATH /usr/local/lib/python3.6/dist-packages/elasticdl/go/bin:$PATH

COPY . /model_zoo
RUN pip install -r /model_zoo/requirements.txt\
 --extra-index-url={{ EXTRA_PYPI_INDEX }}

{% if CLUSTER_SPEC_NAME  %}\
COPY ./{{ CLUSTER_SPEC_NAME }} /cluster_spec/{{ CLUSTER_SPEC_NAME }}\
{% endif %}
"""
    template = Template(tmpl_str)
    docker_file_content = template.render(
        BASE_IMAGE=args.base_image,
        EXTRA_PYPI_INDEX=args.extra_pypi_index,
        CLUSTER_SPEC_NAME=cluster_spec_name,
    )

    with open("./Dockerfile", mode="w") as f:
        f.write(docker_file_content)
Esempio n. 6
0
 def delete_master(self):
     logger.info("pod name is %s" % self.get_master_pod_name())
     self.delete_pod(self.get_master_pod_name())
Esempio n. 7
0
 def create_master(self, **kargs):
     pod = self._create_master_pod_obj(**kargs)
     self.client.create_namespaced_pod(self.namespace, pod)
     logger.info("Master launched.")
Esempio n. 8
0
def push_zoo(args):
    logger.info("Push the image for the model zoo.")
    # Call docker api to push the image to remote registry
    client = docker.DockerClient.from_env()
    for line in client.api.push(args.image, stream=True, decode=True):
        _print_docker_progress(line)