def __init__(self, repo_path, from_image, image_name, image_tag, copy_code=True, in_tmp_repo=True, build_steps=None, env_vars=None, dockerfile_name='Dockerfile'): # This will help create a unique tmp folder for dockerizer in case of concurrent jobs self.uuid = uuid.uuid4().hex self.from_image = from_image self.image_name = image_name self.image_tag = image_tag self.repo_path = repo_path self.folder_name = repo_path.split('/')[-1] self.copy_code = copy_code self.in_tmp_repo = in_tmp_repo if in_tmp_repo and copy_code: self.build_repo_path = self.create_tmp_repo() else: self.build_repo_path = self.repo_path self.build_path = '/'.join(self.build_repo_path.split('/')[:-1]) self.build_steps = get_list(build_steps) self.env_vars = get_list(env_vars) self.dockerfile_path = os.path.join(self.build_path, dockerfile_name) self.polyaxon_requirements_path = self._get_requirements_path() self.polyaxon_setup_path = self._get_setup_path() self.docker = APIClient(version='auto') self.registry_host = None self.docker_url = None
def get_task_pod_spec(self, volume_mounts, volumes, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, env_vars=None, command=None, args=None, resources=None, node_selector=None, affinity=None, tolerations=None, restart_policy='OnFailure'): """Pod spec to be used to create pods for tasks: master, worker, ps.""" volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes pod_container = self.get_pod_container(volume_mounts=volume_mounts, persistence_outputs=persistence_outputs, persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, env_vars=env_vars, command=command, args=args, resources=resources) containers = [pod_container] if self.use_sidecar: sidecar_container = self.get_sidecar_container() containers.append(sidecar_container) node_selector = get_node_selector( node_selector=node_selector, default_node_selector=settings.NODE_SELECTOR_JOBS) affinity = get_affinity( affinity=affinity, default_affinity=settings.AFFINITY_JOBS) tolerations = get_tolerations( tolerations=tolerations, default_tolerations=settings.TOLERATIONS_JOBS) service_account_name = None if settings.K8S_RBAC_ENABLED: service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME return client.V1PodSpec( restart_policy=restart_policy, service_account_name=service_account_name, init_containers=to_list(self.get_init_container(persistence_outputs)), containers=containers, volumes=volumes, node_selector=node_selector, affinity=affinity, tolerations=tolerations)
def get_project_pod_spec(volume_mounts, volumes, image, container_name=None, command=None, args=None, ports=None, resources=None, env_vars=None, restart_policy=None): """Pod spec to be used to create pods for project side: tensorboard, notebooks.""" volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = pods.get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes ports = [client.V1ContainerPort(container_port=port) for port in ports] containers = [ client.V1Container(name=container_name, image=image, command=command, args=args, ports=ports, env=env_vars, resources=pods.get_resources(resources), volume_mounts=volume_mounts) ] return client.V1PodSpec(restart_policy=restart_policy, containers=containers, volumes=volumes)
def __init__(self, build_job, repo_path, from_image, copy_code=True, build_steps=None, env_vars=None, dockerfile_name='Dockerfile'): self.build_job = build_job self.job_uuid = build_job.uuid.hex self.job_name = build_job.unique_name self.from_image = from_image self.image_name = get_image_name(self.build_job) self.image_tag = self.job_uuid self.folder_name = repo_path.split('/')[-1] self.repo_path = repo_path self.copy_code = copy_code self.build_path = '/'.join(self.repo_path.split('/')[:-1]) self.build_steps = get_list(build_steps) self.env_vars = get_list(env_vars) self.dockerfile_path = os.path.join(self.build_path, dockerfile_name) self.polyaxon_requirements_path = self._get_requirements_path() self.polyaxon_setup_path = self._get_setup_path() self.docker = APIClient(version='auto') self.registry_host = None self.docker_url = None
def __init__(self, repo_path, from_image, image_name, image_tag, copy_code=True, in_tmp_repo=True, steps=None, env_vars=None, dockerfile_name='Dockerfile'): self.from_image = from_image self.image_name = image_name self.image_tag = image_tag self.repo_path = repo_path self.folder_name = repo_path.split('/')[-1] self.copy_code = copy_code self.in_tmp_repo = in_tmp_repo if in_tmp_repo and copy_code: self.build_repo_path = self.create_tmp_repo() else: self.build_repo_path = self.repo_path self.build_path = '/'.join(self.build_repo_path.split('/')[:-1]) self.steps = get_list(steps) self.env_vars = get_list(env_vars) self.dockerfile_path = os.path.join(self.build_path, dockerfile_name) self.polyaxon_requirements_path = self._get_requirements_path() self.polyaxon_setup_path = self._get_setup_path() self.docker = None
def get_task_pod_spec(self, task_type, task_idx, volume_mounts, volumes, env_vars=None, command=None, args=None, sidecar_args=None, resources=None, node_selector=None, restart_policy='OnFailure'): """Pod spec to be used to create pods for tasks: master, worker, ps.""" volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes # Add job information env_vars = get_list(env_vars) env_vars.append( client.V1EnvVar(name=constants.CONFIG_MAP_TASK_INFO_KEY_NAME, value=json.dumps({ 'type': task_type, 'index': task_idx }))) pod_container = self.get_pod_container(volume_mounts=volume_mounts, env_vars=env_vars, command=command, args=args, resources=resources) containers = [pod_container] if self.use_sidecar: sidecar_container = self.get_sidecar_container(task_type=task_type, task_idx=task_idx, args=sidecar_args) containers.append(sidecar_container) if not node_selector: node_selector = settings.NODE_SELECTORS_EXPERIMENTS node_selector = json.loads( node_selector) if node_selector else None service_account_name = None if settings.K8S_RBAC_ENABLED: service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME return client.V1PodSpec(restart_policy=restart_policy, service_account_name=service_account_name, init_containers=to_list( self.get_init_container()), containers=containers, volumes=volumes, node_selector=node_selector)
def get_task_pod_spec(self, task_type, task_idx, volume_mounts, volumes, env_vars=None, command=None, args=None, sidecar_args=None, resources=None, node_selector=None, restart_policy='OnFailure'): """Pod spec to be used to create pods for tasks: master, worker, ps.""" volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes # Add job information env_vars = get_list(env_vars) env_vars.append( client.V1EnvVar( name=constants.CONFIG_MAP_TASK_INFO_KEY_NAME, value=json.dumps({'type': task_type, 'index': task_idx}) ) ) pod_container = self.get_pod_container(volume_mounts=volume_mounts, env_vars=env_vars, command=command, args=args, resources=resources) containers = [pod_container] if self.use_sidecar: sidecar_container = self.get_sidecar_container(task_type=task_type, task_idx=task_idx, args=sidecar_args) containers.append(sidecar_container) if not node_selector: node_selector = settings.NODE_SELECTORS_EXPERIMENTS node_selector = json.loads(node_selector) if node_selector else None service_account_name = None if settings.K8S_RBAC_ENABLED: service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME return client.V1PodSpec(restart_policy=restart_policy, service_account_name=service_account_name, init_containers=to_list(self.get_init_container()), containers=containers, volumes=volumes, node_selector=node_selector)
def get_project_pod_spec(volume_mounts, volumes, image, command, args, ports, env_vars=None, env_from=None, container_name=None, resources=None, node_selector=None, affinity=None, tolerations=None, image_pull_policy=None, restart_policy=None, use_service_account=False): """Pod spec to be used to create pods for project: tensorboard, notebooks.""" env_vars = get_list(env_vars) volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes ports = [client.V1ContainerPort(container_port=port) for port in ports] env_vars += get_resources_env_vars(resources=resources) containers = [ client.V1Container(name=container_name, image=image, image_pull_policy=image_pull_policy, command=command, args=args, ports=ports, env=env_vars, env_from=env_from, resources=get_resources(resources), volume_mounts=volume_mounts) ] service_account_name = None if use_service_account and settings.K8S_RBAC_ENABLED: service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME return client.V1PodSpec(restart_policy=restart_policy, service_account_name=service_account_name, containers=containers, volumes=volumes, node_selector=node_selector, affinity=affinity, tolerations=tolerations)
def get_pod_container(self, volume_mounts, env_vars=None, command=None, args=None, resources=None): """Pod job container for task.""" env_vars = get_list(env_vars) env_vars += get_job_env_vars( log_level=self.log_level, outputs_path=get_job_outputs_path(job_name=self.job_name), logs_path=get_job_logs_path(job_name=self.job_name), data_path=get_job_data_path(job_name=self.job_name), project_data_path=get_project_data_path(project_name=self.project_name) ) env_vars += [ get_env_var(name=constants.CONFIG_MAP_JOB_INFO_KEY_NAME, value=json.dumps(self.labels)), ] if resources: env_vars += get_resources_env_vars(resources=resources) ports = [client.V1ContainerPort(container_port=port) for port in self.ports] return client.V1Container(name=self.job_container_name, image=self.job_docker_image, command=command, args=args, ports=ports, env=env_vars, resources=get_resources(resources), volume_mounts=volume_mounts)
def get_pod_container(self, volume_mounts, env_vars=None, command=None, args=None, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, secret_refs=None, configmap_refs=None, resources=None, ephemeral_token=None): """Pod job container for task.""" assert self.cluster_def is not None # Env vars preparations env_vars = get_list(env_vars) outputs_path = get_experiment_outputs_path( persistence_outputs=persistence_outputs, experiment_name=self.experiment_name, original_name=self.original_name, cloning_strategy=self.cloning_strategy) env_vars += get_job_env_vars( persistence_outputs=persistence_outputs, outputs_path=outputs_path, persistence_data=persistence_data, log_level=self.log_level, logs_path=get_experiment_logs_path(self.experiment_name), outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, ephemeral_token=ephemeral_token, ) env_vars += [ get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME, value=json.dumps(self.cluster_def)), get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME, value=self.declarations), get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME, value=json.dumps(self.experiment_labels)), ] env_vars += get_resources_env_vars(resources=resources) # Env from configmap and secret refs env_from = get_pod_env_from(secret_refs=secret_refs, configmap_refs=configmap_refs) ports = [ client.V1ContainerPort(container_port=port) for port in self.ports ] return client.V1Container(name=self.job_container_name, image=self.job_docker_image, command=command, args=args, ports=ports, env=env_vars, env_from=env_from, resources=get_resources(resources), volume_mounts=volume_mounts)
def get_task_pod_spec(self, volume_mounts, volumes, env_vars=None, command=None, args=None, resources=None, node_selector=None, restart_policy='OnFailure'): """Pod spec to be used to create pods for tasks: master, worker, ps.""" volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes pod_container = self.get_pod_container(volume_mounts=volume_mounts, env_vars=env_vars, command=command, args=args, resources=resources) containers = [pod_container] if self.use_sidecar: sidecar_container = self.get_sidecar_container() containers.append(sidecar_container) if not node_selector: node_selector = settings.NODE_SELECTORS_EXPERIMENTS node_selector = json.loads( node_selector) if node_selector else None service_account_name = None if settings.K8S_RBAC_ENABLED: service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME return client.V1PodSpec(restart_policy=restart_policy, service_account_name=service_account_name, init_containers=to_list( self.get_init_container()), containers=containers, volumes=volumes, node_selector=node_selector)
def get_task_pod_spec(self, volume_mounts, volumes, env_vars=None, command=None, args=None, resources=None, node_selector=None, restart_policy='OnFailure'): """Pod spec to be used to create pods for tasks: master, worker, ps.""" volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes pod_container = self.get_pod_container(volume_mounts=volume_mounts, env_vars=env_vars, command=command, args=args, resources=resources) containers = [pod_container] if self.use_sidecar: sidecar_container = self.get_sidecar_container() containers.append(sidecar_container) if not node_selector: node_selector = settings.NODE_SELECTORS_EXPERIMENTS node_selector = json.loads(node_selector) if node_selector else None service_account_name = None if settings.K8S_RBAC_ENABLED: service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME return client.V1PodSpec(restart_policy=restart_policy, service_account_name=service_account_name, init_containers=to_list(self.get_init_container()), containers=containers, volumes=volumes, node_selector=node_selector)
def get_project_pod_spec(volume_mounts, volumes, image, command, args, ports, env_vars=None, container_name=None, resources=None, node_selector=None, restart_policy=None, use_service_account=False): """Pod spec to be used to create pods for project: tensorboard, notebooks.""" volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes ports = [client.V1ContainerPort(container_port=port) for port in ports] containers = [client.V1Container(name=container_name, image=image, command=command, args=args, ports=ports, env=env_vars, resources=get_resources(resources), volume_mounts=volume_mounts)] service_account_name = None if use_service_account and settings.K8S_RBAC_ENABLED: service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME return client.V1PodSpec(restart_policy=restart_policy, service_account_name=service_account_name, containers=containers, volumes=volumes, node_selector=node_selector)
def get_pod_container(self, volume_mounts, env_vars=None, command=None, args=None, resources=None): """Pod job container for task.""" env_vars = get_list(env_vars) env_vars += [ self.get_from_experiment_config_map( constants.CONFIG_MAP_CLUSTER_KEY_NAME), self.get_from_experiment_config_map( constants.CONFIG_MAP_DECLARATIONS_KEY_NAME), self.get_from_experiment_config_map( constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME), self.get_from_experiment_config_map( constants.CONFIG_MAP_LOG_LEVEL_KEY_NAME), self.get_from_experiment_config_map(API_KEY_NAME), self.get_from_experiment_config_map( constants.CONFIG_MAP_EXPERIMENT_OUTPUTS_PATH_KEY_NAME), self.get_from_experiment_config_map( constants.CONFIG_MAP_EXPERIMENT_LOGS_PATH_KEY_NAME), self.get_from_experiment_config_map( constants.CONFIG_MAP_EXPERIMENT_DATA_PATH_KEY_NAME), get_from_app_secret('POLYAXON_SECRET_KEY', 'polyaxon-secret'), get_from_app_secret('POLYAXON_INTERNAL_SECRET_TOKEN', 'polyaxon-internal-secret-token') ] if resources: if resources.gpu and settings.LD_LIBRARY_PATH: env_vars.append( client.V1EnvVar(name='LD_LIBRARY_PATH', value=settings.LD_LIBRARY_PATH)) if resources.gpu and not settings.LD_LIBRARY_PATH: logger.warning( '`LD_LIBRARY_PATH` was not properly set.') # Publish error ports = [ client.V1ContainerPort(container_port=port) for port in self.ports ] return client.V1Container(name=self.job_container_name, image=self.job_docker_image, command=command, args=args, ports=ports, env=env_vars, resources=get_resources(resources), volume_mounts=volume_mounts)
def get_pod_container(self, volume_mounts, persistence_outputs, persistence_data, outputs_refs_jobs=None, outputs_refs_experiments=None, secret_refs=None, configmap_refs=None, env_vars=None, command=None, args=None, resources=None): """Pod job container for task.""" # Env vars preparation env_vars = get_list(env_vars) env_vars += get_job_env_vars( log_level=self.log_level, persistence_outputs=persistence_outputs, outputs_path=get_job_outputs_path( persistence_outputs=persistence_outputs, job_name=self.job_name), persistence_data=persistence_data, logs_path=get_job_logs_path(job_name=self.job_name), outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments) env_vars += [ get_env_var(name=constants.CONFIG_MAP_JOB_INFO_KEY_NAME, value=json.dumps(self.labels)), ] env_vars += get_resources_env_vars(resources=resources) # Env from configmap and secret refs env_from = get_pod_env_from(secret_refs=secret_refs, configmap_refs=configmap_refs) ports = [ client.V1ContainerPort(container_port=port) for port in self.ports ] return client.V1Container(name=self.job_container_name, image=self.job_docker_image, command=command, args=args, ports=ports or None, env=env_vars, env_from=env_from, resources=get_resources(resources), volume_mounts=volume_mounts)
def get_pod_container(self, volume_mounts, env_vars=None, command=None, args=None, resources=None): """Pod job container for task.""" assert self.cluster_def is not None env_vars = get_list(env_vars) outputs_path = get_experiment_outputs_path( experiment_name=self.experiment_name, original_name=self.original_name, cloning_strategy=self.cloning_strategy) env_vars += get_job_env_vars( log_level=self.log_level, outputs_path=outputs_path, logs_path=get_experiment_logs_path(self.experiment_name), data_path=get_experiment_data_path(self.experiment_name), project_data_path=get_project_data_path( project_name=self.project_name)) env_vars += [ get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME, value=json.dumps(self.cluster_def)), get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME, value=self.declarations), get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME, value=json.dumps(self.experiment_labels)), ] if resources: env_vars += get_resources_env_vars(resources=resources) ports = [ client.V1ContainerPort(container_port=port) for port in self.ports ] return client.V1Container(name=self.job_container_name, image=self.job_docker_image, command=command, args=args, ports=ports, env=env_vars, resources=get_resources(resources), volume_mounts=volume_mounts)
def get_pod_container(self, volume_mounts, env_vars=None, command=None, args=None, resources=None): """Pod job container for task.""" assert self.cluster_def is not None env_vars = get_list(env_vars) outputs_path = get_experiment_outputs_path( experiment_name=self.experiment_name, original_name=self.original_name, cloning_strategy=self.cloning_strategy) env_vars += get_job_env_vars( log_level=self.log_level, outputs_path=outputs_path, logs_path=get_experiment_logs_path(self.experiment_name), data_path=get_experiment_data_path(self.experiment_name), project_data_path=get_project_data_path(project_name=self.project_name) ) env_vars += [ get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME, value=json.dumps(self.cluster_def)), get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME, value=self.declarations), get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME, value=json.dumps(self.experiment_labels)), ] if resources: env_vars += get_resources_env_vars(resources=resources) ports = [client.V1ContainerPort(container_port=port) for port in self.ports] return client.V1Container(name=self.job_container_name, image=self.job_docker_image, command=command, args=args, ports=ports, env=env_vars, resources=get_resources(resources), volume_mounts=volume_mounts)
def get_task_pod_spec(self, task_type, task_idx, volume_mounts, volumes, env_vars=None, command=None, args=None, sidecar_args=None, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, node_selector=None, affinity=None, tolerations=None, restart_policy='OnFailure'): """Pod spec to be used to create pods for tasks: master, worker, ps.""" volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes # Add job information env_vars = get_list(env_vars) env_vars.append( client.V1EnvVar( name=constants.CONFIG_MAP_TASK_INFO_KEY_NAME, value=json.dumps({'type': task_type, 'index': task_idx}) ) ) pod_container = self.get_pod_container(volume_mounts=volume_mounts, env_vars=env_vars, command=command, args=args, persistence_outputs=persistence_outputs, persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, resources=resources) containers = [pod_container] if self.use_sidecar: sidecar_container = self.get_sidecar_container(task_type=task_type, task_idx=task_idx, args=sidecar_args) containers.append(sidecar_container) node_selector = get_node_selector( node_selector=node_selector, default_node_selector=settings.NODE_SELECTOR_EXPERIMENTS) affinity = get_affinity( affinity=affinity, default_affinity=settings.AFFINITY_EXPERIMENTS) tolerations = get_tolerations( tolerations=tolerations, default_tolerations=settings.TOLERATIONS_EXPERIMENTS) service_account_name = None if settings.K8S_RBAC_ENABLED: service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME return client.V1PodSpec( restart_policy=restart_policy, service_account_name=service_account_name, init_containers=to_list(self.get_init_container(persistence_outputs)), containers=containers, volumes=volumes, node_selector=node_selector, tolerations=tolerations, affinity=affinity)