Ejemplo n.º 1
0
    def __init__(self,
                 repo_path,
                 from_image,
                 image_name,
                 image_tag,
                 copy_code=True,
                 in_tmp_repo=True,
                 build_steps=None,
                 env_vars=None,
                 dockerfile_name='Dockerfile'):
        # This will help create a unique tmp folder for dockerizer in case of concurrent jobs
        self.uuid = uuid.uuid4().hex
        self.from_image = from_image
        self.image_name = image_name
        self.image_tag = image_tag
        self.repo_path = repo_path
        self.folder_name = repo_path.split('/')[-1]
        self.copy_code = copy_code
        self.in_tmp_repo = in_tmp_repo
        if in_tmp_repo and copy_code:
            self.build_repo_path = self.create_tmp_repo()
        else:
            self.build_repo_path = self.repo_path

        self.build_path = '/'.join(self.build_repo_path.split('/')[:-1])
        self.build_steps = get_list(build_steps)
        self.env_vars = get_list(env_vars)
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.docker = APIClient(version='auto')
        self.registry_host = None
        self.docker_url = None
Ejemplo n.º 2
0
    def __init__(self,
                 repo_path,
                 from_image,
                 image_name,
                 image_tag,
                 copy_code=True,
                 in_tmp_repo=True,
                 build_steps=None,
                 env_vars=None,
                 dockerfile_name='Dockerfile'):
        # This will help create a unique tmp folder for dockerizer in case of concurrent jobs
        self.uuid = uuid.uuid4().hex
        self.from_image = from_image
        self.image_name = image_name
        self.image_tag = image_tag
        self.repo_path = repo_path
        self.folder_name = repo_path.split('/')[-1]
        self.copy_code = copy_code
        self.in_tmp_repo = in_tmp_repo
        if in_tmp_repo and copy_code:
            self.build_repo_path = self.create_tmp_repo()
        else:
            self.build_repo_path = self.repo_path

        self.build_path = '/'.join(self.build_repo_path.split('/')[:-1])
        self.build_steps = get_list(build_steps)
        self.env_vars = get_list(env_vars)
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.docker = APIClient(version='auto')
        self.registry_host = None
        self.docker_url = None
Ejemplo n.º 3
0
    def get_task_pod_spec(self,
                          volume_mounts,
                          volumes,
                          persistence_outputs=None,
                          persistence_data=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None,
                          node_selector=None,
                          affinity=None,
                          tolerations=None,
                          restart_policy='OnFailure'):
        """Pod spec to be used to create pods for tasks: master, worker, ps."""
        volume_mounts = get_list(volume_mounts)
        volumes = get_list(volumes)

        gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources)
        volume_mounts += gpu_volume_mounts
        volumes += gpu_volumes

        pod_container = self.get_pod_container(volume_mounts=volume_mounts,
                                               persistence_outputs=persistence_outputs,
                                               persistence_data=persistence_data,
                                               outputs_refs_jobs=outputs_refs_jobs,
                                               outputs_refs_experiments=outputs_refs_experiments,
                                               env_vars=env_vars,
                                               command=command,
                                               args=args,
                                               resources=resources)

        containers = [pod_container]
        if self.use_sidecar:
            sidecar_container = self.get_sidecar_container()
            containers.append(sidecar_container)

        node_selector = get_node_selector(
            node_selector=node_selector,
            default_node_selector=settings.NODE_SELECTOR_JOBS)
        affinity = get_affinity(
            affinity=affinity,
            default_affinity=settings.AFFINITY_JOBS)
        tolerations = get_tolerations(
            tolerations=tolerations,
            default_tolerations=settings.TOLERATIONS_JOBS)

        service_account_name = None
        if settings.K8S_RBAC_ENABLED:
            service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME
        return client.V1PodSpec(
            restart_policy=restart_policy,
            service_account_name=service_account_name,
            init_containers=to_list(self.get_init_container(persistence_outputs)),
            containers=containers,
            volumes=volumes,
            node_selector=node_selector,
            affinity=affinity,
            tolerations=tolerations)
Ejemplo n.º 4
0
def get_project_pod_spec(volume_mounts,
                         volumes,
                         image,
                         container_name=None,
                         command=None,
                         args=None,
                         ports=None,
                         resources=None,
                         env_vars=None,
                         restart_policy=None):
    """Pod spec to be used to create pods for project side: tensorboard, notebooks."""
    volume_mounts = get_list(volume_mounts)
    volumes = get_list(volumes)

    gpu_volume_mounts, gpu_volumes = pods.get_gpu_volumes_def(resources)
    volume_mounts += gpu_volume_mounts
    volumes += gpu_volumes

    ports = [client.V1ContainerPort(container_port=port) for port in ports]

    containers = [
        client.V1Container(name=container_name,
                           image=image,
                           command=command,
                           args=args,
                           ports=ports,
                           env=env_vars,
                           resources=pods.get_resources(resources),
                           volume_mounts=volume_mounts)
    ]
    return client.V1PodSpec(restart_policy=restart_policy,
                            containers=containers,
                            volumes=volumes)
Ejemplo n.º 5
0
    def __init__(self,
                 build_job,
                 repo_path,
                 from_image,
                 copy_code=True,
                 build_steps=None,
                 env_vars=None,
                 dockerfile_name='Dockerfile'):
        self.build_job = build_job
        self.job_uuid = build_job.uuid.hex
        self.job_name = build_job.unique_name
        self.from_image = from_image
        self.image_name = get_image_name(self.build_job)
        self.image_tag = self.job_uuid
        self.folder_name = repo_path.split('/')[-1]
        self.repo_path = repo_path
        self.copy_code = copy_code

        self.build_path = '/'.join(self.repo_path.split('/')[:-1])
        self.build_steps = get_list(build_steps)
        self.env_vars = get_list(env_vars)
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.docker = APIClient(version='auto')
        self.registry_host = None
        self.docker_url = None
Ejemplo n.º 6
0
    def __init__(self,
                 repo_path,
                 from_image,
                 image_name,
                 image_tag,
                 copy_code=True,
                 in_tmp_repo=True,
                 steps=None,
                 env_vars=None,
                 dockerfile_name='Dockerfile'):
        self.from_image = from_image
        self.image_name = image_name
        self.image_tag = image_tag
        self.repo_path = repo_path
        self.folder_name = repo_path.split('/')[-1]
        self.copy_code = copy_code
        self.in_tmp_repo = in_tmp_repo
        if in_tmp_repo and copy_code:
            self.build_repo_path = self.create_tmp_repo()
        else:
            self.build_repo_path = self.repo_path

        self.build_path = '/'.join(self.build_repo_path.split('/')[:-1])
        self.steps = get_list(steps)
        self.env_vars = get_list(env_vars)
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.docker = None
Ejemplo n.º 7
0
    def __init__(self,
                 build_job,
                 repo_path,
                 from_image,
                 copy_code=True,
                 build_steps=None,
                 env_vars=None,
                 dockerfile_name='Dockerfile'):
        self.build_job = build_job
        self.job_uuid = build_job.uuid.hex
        self.job_name = build_job.unique_name
        self.from_image = from_image
        self.image_name = get_image_name(self.build_job)
        self.image_tag = self.job_uuid
        self.folder_name = repo_path.split('/')[-1]
        self.repo_path = repo_path
        self.copy_code = copy_code

        self.build_path = '/'.join(self.repo_path.split('/')[:-1])
        self.build_steps = get_list(build_steps)
        self.env_vars = get_list(env_vars)
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.docker = APIClient(version='auto')
        self.registry_host = None
        self.docker_url = None
Ejemplo n.º 8
0
    def get_task_pod_spec(self,
                          task_type,
                          task_idx,
                          volume_mounts,
                          volumes,
                          env_vars=None,
                          command=None,
                          args=None,
                          sidecar_args=None,
                          resources=None,
                          node_selector=None,
                          restart_policy='OnFailure'):
        """Pod spec to be used to create pods for tasks: master, worker, ps."""
        volume_mounts = get_list(volume_mounts)
        volumes = get_list(volumes)

        gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources)
        volume_mounts += gpu_volume_mounts
        volumes += gpu_volumes

        # Add job information
        env_vars = get_list(env_vars)
        env_vars.append(
            client.V1EnvVar(name=constants.CONFIG_MAP_TASK_INFO_KEY_NAME,
                            value=json.dumps({
                                'type': task_type,
                                'index': task_idx
                            })))

        pod_container = self.get_pod_container(volume_mounts=volume_mounts,
                                               env_vars=env_vars,
                                               command=command,
                                               args=args,
                                               resources=resources)

        containers = [pod_container]
        if self.use_sidecar:
            sidecar_container = self.get_sidecar_container(task_type=task_type,
                                                           task_idx=task_idx,
                                                           args=sidecar_args)
            containers.append(sidecar_container)

        if not node_selector:
            node_selector = settings.NODE_SELECTORS_EXPERIMENTS
            node_selector = json.loads(
                node_selector) if node_selector else None
        service_account_name = None
        if settings.K8S_RBAC_ENABLED:
            service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME
        return client.V1PodSpec(restart_policy=restart_policy,
                                service_account_name=service_account_name,
                                init_containers=to_list(
                                    self.get_init_container()),
                                containers=containers,
                                volumes=volumes,
                                node_selector=node_selector)
Ejemplo n.º 9
0
    def get_task_pod_spec(self,
                          task_type,
                          task_idx,
                          volume_mounts,
                          volumes,
                          env_vars=None,
                          command=None,
                          args=None,
                          sidecar_args=None,
                          resources=None,
                          node_selector=None,
                          restart_policy='OnFailure'):
        """Pod spec to be used to create pods for tasks: master, worker, ps."""
        volume_mounts = get_list(volume_mounts)
        volumes = get_list(volumes)

        gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources)
        volume_mounts += gpu_volume_mounts
        volumes += gpu_volumes

        # Add job information
        env_vars = get_list(env_vars)
        env_vars.append(
            client.V1EnvVar(
                name=constants.CONFIG_MAP_TASK_INFO_KEY_NAME,
                value=json.dumps({'type': task_type, 'index': task_idx})
            )
        )

        pod_container = self.get_pod_container(volume_mounts=volume_mounts,
                                               env_vars=env_vars,
                                               command=command,
                                               args=args,
                                               resources=resources)

        containers = [pod_container]
        if self.use_sidecar:
            sidecar_container = self.get_sidecar_container(task_type=task_type,
                                                           task_idx=task_idx,
                                                           args=sidecar_args)
            containers.append(sidecar_container)

        if not node_selector:
            node_selector = settings.NODE_SELECTORS_EXPERIMENTS
            node_selector = json.loads(node_selector) if node_selector else None
        service_account_name = None
        if settings.K8S_RBAC_ENABLED:
            service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME
        return client.V1PodSpec(restart_policy=restart_policy,
                                service_account_name=service_account_name,
                                init_containers=to_list(self.get_init_container()),
                                containers=containers,
                                volumes=volumes,
                                node_selector=node_selector)
Ejemplo n.º 10
0
def get_project_pod_spec(volume_mounts,
                         volumes,
                         image,
                         command,
                         args,
                         ports,
                         env_vars=None,
                         env_from=None,
                         container_name=None,
                         resources=None,
                         node_selector=None,
                         affinity=None,
                         tolerations=None,
                         image_pull_policy=None,
                         restart_policy=None,
                         use_service_account=False):
    """Pod spec to be used to create pods for project: tensorboard, notebooks."""
    env_vars = get_list(env_vars)
    volume_mounts = get_list(volume_mounts)
    volumes = get_list(volumes)

    gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources)
    volume_mounts += gpu_volume_mounts
    volumes += gpu_volumes

    ports = [client.V1ContainerPort(container_port=port) for port in ports]
    env_vars += get_resources_env_vars(resources=resources)

    containers = [
        client.V1Container(name=container_name,
                           image=image,
                           image_pull_policy=image_pull_policy,
                           command=command,
                           args=args,
                           ports=ports,
                           env=env_vars,
                           env_from=env_from,
                           resources=get_resources(resources),
                           volume_mounts=volume_mounts)
    ]

    service_account_name = None
    if use_service_account and settings.K8S_RBAC_ENABLED:
        service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME

    return client.V1PodSpec(restart_policy=restart_policy,
                            service_account_name=service_account_name,
                            containers=containers,
                            volumes=volumes,
                            node_selector=node_selector,
                            affinity=affinity,
                            tolerations=tolerations)
Ejemplo n.º 11
0
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None):
        """Pod job container for task."""
        env_vars = get_list(env_vars)
        env_vars += get_job_env_vars(
            log_level=self.log_level,
            outputs_path=get_job_outputs_path(job_name=self.job_name),
            logs_path=get_job_logs_path(job_name=self.job_name),
            data_path=get_job_data_path(job_name=self.job_name),
            project_data_path=get_project_data_path(project_name=self.project_name)
        )
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_JOB_INFO_KEY_NAME, value=json.dumps(self.labels)),
        ]

        if resources:
            env_vars += get_resources_env_vars(resources=resources)

        ports = [client.V1ContainerPort(container_port=port) for port in self.ports]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
Ejemplo n.º 12
0
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None):
        """Pod job container for task."""
        env_vars = get_list(env_vars)
        env_vars += get_job_env_vars(
            log_level=self.log_level,
            outputs_path=get_job_outputs_path(job_name=self.job_name),
            logs_path=get_job_logs_path(job_name=self.job_name),
            data_path=get_job_data_path(job_name=self.job_name),
            project_data_path=get_project_data_path(project_name=self.project_name)
        )
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_JOB_INFO_KEY_NAME, value=json.dumps(self.labels)),
        ]

        if resources:
            env_vars += get_resources_env_vars(resources=resources)

        ports = [client.V1ContainerPort(container_port=port) for port in self.ports]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
Ejemplo n.º 13
0
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          persistence_outputs=None,
                          persistence_data=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          secret_refs=None,
                          configmap_refs=None,
                          resources=None,
                          ephemeral_token=None):
        """Pod job container for task."""
        assert self.cluster_def is not None

        # Env vars preparations
        env_vars = get_list(env_vars)
        outputs_path = get_experiment_outputs_path(
            persistence_outputs=persistence_outputs,
            experiment_name=self.experiment_name,
            original_name=self.original_name,
            cloning_strategy=self.cloning_strategy)
        env_vars += get_job_env_vars(
            persistence_outputs=persistence_outputs,
            outputs_path=outputs_path,
            persistence_data=persistence_data,
            log_level=self.log_level,
            logs_path=get_experiment_logs_path(self.experiment_name),
            outputs_refs_jobs=outputs_refs_jobs,
            outputs_refs_experiments=outputs_refs_experiments,
            ephemeral_token=ephemeral_token,
        )
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME,
                        value=json.dumps(self.cluster_def)),
            get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME,
                        value=self.declarations),
            get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME,
                        value=json.dumps(self.experiment_labels)),
        ]
        env_vars += get_resources_env_vars(resources=resources)

        # Env from configmap and secret refs
        env_from = get_pod_env_from(secret_refs=secret_refs,
                                    configmap_refs=configmap_refs)

        ports = [
            client.V1ContainerPort(container_port=port) for port in self.ports
        ]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  env_from=env_from,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
Ejemplo n.º 14
0
    def get_task_pod_spec(self,
                          volume_mounts,
                          volumes,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None,
                          node_selector=None,
                          restart_policy='OnFailure'):
        """Pod spec to be used to create pods for tasks: master, worker, ps."""
        volume_mounts = get_list(volume_mounts)
        volumes = get_list(volumes)

        gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources)
        volume_mounts += gpu_volume_mounts
        volumes += gpu_volumes

        pod_container = self.get_pod_container(volume_mounts=volume_mounts,
                                               env_vars=env_vars,
                                               command=command,
                                               args=args,
                                               resources=resources)

        containers = [pod_container]
        if self.use_sidecar:
            sidecar_container = self.get_sidecar_container()
            containers.append(sidecar_container)

        if not node_selector:
            node_selector = settings.NODE_SELECTORS_EXPERIMENTS
            node_selector = json.loads(
                node_selector) if node_selector else None
        service_account_name = None
        if settings.K8S_RBAC_ENABLED:
            service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME
        return client.V1PodSpec(restart_policy=restart_policy,
                                service_account_name=service_account_name,
                                init_containers=to_list(
                                    self.get_init_container()),
                                containers=containers,
                                volumes=volumes,
                                node_selector=node_selector)
Ejemplo n.º 15
0
    def get_task_pod_spec(self,
                          volume_mounts,
                          volumes,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None,
                          node_selector=None,
                          restart_policy='OnFailure'):
        """Pod spec to be used to create pods for tasks: master, worker, ps."""
        volume_mounts = get_list(volume_mounts)
        volumes = get_list(volumes)

        gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources)
        volume_mounts += gpu_volume_mounts
        volumes += gpu_volumes

        pod_container = self.get_pod_container(volume_mounts=volume_mounts,
                                               env_vars=env_vars,
                                               command=command,
                                               args=args,
                                               resources=resources)

        containers = [pod_container]
        if self.use_sidecar:
            sidecar_container = self.get_sidecar_container()
            containers.append(sidecar_container)

        if not node_selector:
            node_selector = settings.NODE_SELECTORS_EXPERIMENTS
            node_selector = json.loads(node_selector) if node_selector else None
        service_account_name = None
        if settings.K8S_RBAC_ENABLED:
            service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME
        return client.V1PodSpec(restart_policy=restart_policy,
                                service_account_name=service_account_name,
                                init_containers=to_list(self.get_init_container()),
                                containers=containers,
                                volumes=volumes,
                                node_selector=node_selector)
Ejemplo n.º 16
0
def get_project_pod_spec(volume_mounts,
                         volumes,
                         image,
                         command,
                         args,
                         ports,
                         env_vars=None,
                         container_name=None,
                         resources=None,
                         node_selector=None,
                         restart_policy=None,
                         use_service_account=False):
    """Pod spec to be used to create pods for project: tensorboard, notebooks."""
    volume_mounts = get_list(volume_mounts)
    volumes = get_list(volumes)

    gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources)
    volume_mounts += gpu_volume_mounts
    volumes += gpu_volumes

    ports = [client.V1ContainerPort(container_port=port) for port in ports]

    containers = [client.V1Container(name=container_name,
                                     image=image,
                                     command=command,
                                     args=args,
                                     ports=ports,
                                     env=env_vars,
                                     resources=get_resources(resources),
                                     volume_mounts=volume_mounts)]

    service_account_name = None
    if use_service_account and settings.K8S_RBAC_ENABLED:
        service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME

    return client.V1PodSpec(restart_policy=restart_policy,
                            service_account_name=service_account_name,
                            containers=containers,
                            volumes=volumes,
                            node_selector=node_selector)
Ejemplo n.º 17
0
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None):
        """Pod job container for task."""
        env_vars = get_list(env_vars)
        env_vars += [
            self.get_from_experiment_config_map(
                constants.CONFIG_MAP_CLUSTER_KEY_NAME),
            self.get_from_experiment_config_map(
                constants.CONFIG_MAP_DECLARATIONS_KEY_NAME),
            self.get_from_experiment_config_map(
                constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME),
            self.get_from_experiment_config_map(
                constants.CONFIG_MAP_LOG_LEVEL_KEY_NAME),
            self.get_from_experiment_config_map(API_KEY_NAME),
            self.get_from_experiment_config_map(
                constants.CONFIG_MAP_EXPERIMENT_OUTPUTS_PATH_KEY_NAME),
            self.get_from_experiment_config_map(
                constants.CONFIG_MAP_EXPERIMENT_LOGS_PATH_KEY_NAME),
            self.get_from_experiment_config_map(
                constants.CONFIG_MAP_EXPERIMENT_DATA_PATH_KEY_NAME),
            get_from_app_secret('POLYAXON_SECRET_KEY', 'polyaxon-secret'),
            get_from_app_secret('POLYAXON_INTERNAL_SECRET_TOKEN',
                                'polyaxon-internal-secret-token')
        ]

        if resources:
            if resources.gpu and settings.LD_LIBRARY_PATH:
                env_vars.append(
                    client.V1EnvVar(name='LD_LIBRARY_PATH',
                                    value=settings.LD_LIBRARY_PATH))
            if resources.gpu and not settings.LD_LIBRARY_PATH:
                logger.warning(
                    '`LD_LIBRARY_PATH` was not properly set.')  # Publish error

        ports = [
            client.V1ContainerPort(container_port=port) for port in self.ports
        ]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
Ejemplo n.º 18
0
    def get_pod_container(self,
                          volume_mounts,
                          persistence_outputs,
                          persistence_data,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          secret_refs=None,
                          configmap_refs=None,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None):
        """Pod job container for task."""
        # Env vars preparation
        env_vars = get_list(env_vars)
        env_vars += get_job_env_vars(
            log_level=self.log_level,
            persistence_outputs=persistence_outputs,
            outputs_path=get_job_outputs_path(
                persistence_outputs=persistence_outputs,
                job_name=self.job_name),
            persistence_data=persistence_data,
            logs_path=get_job_logs_path(job_name=self.job_name),
            outputs_refs_jobs=outputs_refs_jobs,
            outputs_refs_experiments=outputs_refs_experiments)
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_JOB_INFO_KEY_NAME,
                        value=json.dumps(self.labels)),
        ]

        env_vars += get_resources_env_vars(resources=resources)

        # Env from configmap and secret refs
        env_from = get_pod_env_from(secret_refs=secret_refs,
                                    configmap_refs=configmap_refs)

        ports = [
            client.V1ContainerPort(container_port=port) for port in self.ports
        ]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports or None,
                                  env=env_vars,
                                  env_from=env_from,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
Ejemplo n.º 19
0
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None):
        """Pod job container for task."""
        assert self.cluster_def is not None

        env_vars = get_list(env_vars)
        outputs_path = get_experiment_outputs_path(
            experiment_name=self.experiment_name,
            original_name=self.original_name,
            cloning_strategy=self.cloning_strategy)
        env_vars += get_job_env_vars(
            log_level=self.log_level,
            outputs_path=outputs_path,
            logs_path=get_experiment_logs_path(self.experiment_name),
            data_path=get_experiment_data_path(self.experiment_name),
            project_data_path=get_project_data_path(
                project_name=self.project_name))
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME,
                        value=json.dumps(self.cluster_def)),
            get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME,
                        value=self.declarations),
            get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME,
                        value=json.dumps(self.experiment_labels)),
        ]

        if resources:
            env_vars += get_resources_env_vars(resources=resources)

        ports = [
            client.V1ContainerPort(container_port=port) for port in self.ports
        ]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
Ejemplo n.º 20
0
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None):
        """Pod job container for task."""
        assert self.cluster_def is not None

        env_vars = get_list(env_vars)
        outputs_path = get_experiment_outputs_path(
            experiment_name=self.experiment_name,
            original_name=self.original_name,
            cloning_strategy=self.cloning_strategy)
        env_vars += get_job_env_vars(
            log_level=self.log_level,
            outputs_path=outputs_path,
            logs_path=get_experiment_logs_path(self.experiment_name),
            data_path=get_experiment_data_path(self.experiment_name),
            project_data_path=get_project_data_path(project_name=self.project_name)
        )
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME,
                        value=json.dumps(self.cluster_def)),
            get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME,
                        value=self.declarations),
            get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME,
                        value=json.dumps(self.experiment_labels)),
        ]

        if resources:
            env_vars += get_resources_env_vars(resources=resources)

        ports = [client.V1ContainerPort(container_port=port) for port in self.ports]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
Ejemplo n.º 21
0
    def get_task_pod_spec(self,
                          task_type,
                          task_idx,
                          volume_mounts,
                          volumes,
                          env_vars=None,
                          command=None,
                          args=None,
                          sidecar_args=None,
                          persistence_outputs=None,
                          persistence_data=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          resources=None,
                          node_selector=None,
                          affinity=None,
                          tolerations=None,
                          restart_policy='OnFailure'):
        """Pod spec to be used to create pods for tasks: master, worker, ps."""
        volume_mounts = get_list(volume_mounts)
        volumes = get_list(volumes)

        gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources)
        volume_mounts += gpu_volume_mounts
        volumes += gpu_volumes

        # Add job information
        env_vars = get_list(env_vars)
        env_vars.append(
            client.V1EnvVar(
                name=constants.CONFIG_MAP_TASK_INFO_KEY_NAME,
                value=json.dumps({'type': task_type, 'index': task_idx})
            )
        )

        pod_container = self.get_pod_container(volume_mounts=volume_mounts,
                                               env_vars=env_vars,
                                               command=command,
                                               args=args,
                                               persistence_outputs=persistence_outputs,
                                               persistence_data=persistence_data,
                                               outputs_refs_jobs=outputs_refs_jobs,
                                               outputs_refs_experiments=outputs_refs_experiments,
                                               resources=resources)

        containers = [pod_container]
        if self.use_sidecar:
            sidecar_container = self.get_sidecar_container(task_type=task_type,
                                                           task_idx=task_idx,
                                                           args=sidecar_args)
            containers.append(sidecar_container)

        node_selector = get_node_selector(
            node_selector=node_selector,
            default_node_selector=settings.NODE_SELECTOR_EXPERIMENTS)
        affinity = get_affinity(
            affinity=affinity,
            default_affinity=settings.AFFINITY_EXPERIMENTS)
        tolerations = get_tolerations(
            tolerations=tolerations,
            default_tolerations=settings.TOLERATIONS_EXPERIMENTS)
        service_account_name = None
        if settings.K8S_RBAC_ENABLED:
            service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME
        return client.V1PodSpec(
            restart_policy=restart_policy,
            service_account_name=service_account_name,
            init_containers=to_list(self.get_init_container(persistence_outputs)),
            containers=containers,
            volumes=volumes,
            node_selector=node_selector,
            tolerations=tolerations,
            affinity=affinity)