Exemple #1
0
    def __init__(self,
                 build_job,
                 repo_path,
                 from_image,
                 copy_code=True,
                 build_steps=None,
                 env_vars=None,
                 dockerfile_name='Dockerfile'):
        self.build_job = build_job
        self.job_uuid = build_job.uuid.hex
        self.job_name = build_job.unique_name
        self.from_image = from_image
        self.image_name = get_image_name(self.build_job)
        self.image_tag = self.job_uuid
        self.folder_name = repo_path.split('/')[-1]
        self.repo_path = repo_path
        self.copy_code = copy_code

        self.build_path = '/'.join(self.repo_path.split('/')[:-1])
        self.build_steps = to_list(build_steps, check_none=True)
        self.env_vars = to_list(env_vars, check_none=True)
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.docker = APIClient(version='auto')
        self.registry_host = None
        self.docker_url = None
        self.is_pushing = False
Exemple #2
0
    def __init__(self,
                 repo_path,
                 from_image,
                 image_name,
                 image_tag,
                 copy_code=True,
                 in_tmp_repo=True,
                 build_steps=None,
                 env_vars=None,
                 dockerfile_name='Dockerfile'):
        # This will help create a unique tmp folder for dockerizer in case of concurrent jobs
        self.uuid = uuid.uuid4().hex
        self.from_image = from_image
        self.image_name = image_name
        self.image_tag = image_tag
        self.repo_path = repo_path
        self.folder_name = repo_path.split('/')[-1]
        self.copy_code = copy_code
        self.in_tmp_repo = in_tmp_repo
        if in_tmp_repo and copy_code:
            self.build_repo_path = self.create_tmp_repo()
        else:
            self.build_repo_path = self.repo_path

        self.build_path = '/'.join(self.build_repo_path.split('/')[:-1])
        self.build_steps = to_list(build_steps, check_none=True)
        self.env_vars = to_list(env_vars, check_none=True)
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.docker = APIClient(version='auto')
        self.registry_host = None
        self.docker_url = None
Exemple #3
0
 def get_init_container(self, init_command, init_args, env_vars,
                        context_mounts, persistence_outputs,
                        persistence_data):
     """Pod init container for setting outputs path."""
     env_vars = to_list(env_vars, check_none=True)
     outputs_path = stores.get_job_outputs_path(
         persistence=persistence_outputs, job_name=self.job_name)
     _, outputs_volume_mount = get_pod_outputs_volume(
         persistence_outputs=persistence_outputs)
     volume_mounts = outputs_volume_mount + to_list(context_mounts,
                                                    check_none=True)
     init_command = init_command or ["/bin/sh", "-c"]
     init_args = init_args or to_list(
         get_output_args(command=InitCommands.CREATE,
                         outputs_path=outputs_path))
     init_args += to_list(
         get_auth_context_args(entity='job', entity_name=self.job_name))
     return client.V1Container(
         name=self.init_container_name,
         image=self.init_docker_image,
         image_pull_policy=self.init_docker_image_pull_policy,
         command=init_command,
         args=[''.join(init_args)],
         env=env_vars,
         volume_mounts=volume_mounts)
Exemple #4
0
    def __init__(self,
                 repo_path,
                 from_image,
                 copy_code=True,
                 build_steps=None,
                 env_vars=None,
                 nvidia_bin=None,
                 dockerfile_name=POLYAXON_DOCKERFILE_NAME,
                 lang_env=None,
                 uid=None,
                 gid=None):
        self.from_image = from_image
        self.folder_name = repo_path.split('/')[-1]
        self.repo_path = repo_path
        self.copy_code = copy_code

        self.build_path = '/'.join(self.repo_path.split('/')[:-1])
        self.build_steps = to_list(build_steps, check_none=True)
        self.env_vars = to_list(env_vars, check_none=True)
        self.nvidia_bin = nvidia_bin
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_conda_env_path = self._get_conda_env_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.lang_env = lang_env
        self.uid = uid
        self.gid = gid
        self.is_pushing = False
Exemple #5
0
    def get_task_pod_spec(self,
                          volume_mounts,
                          volumes,
                          resource_name,
                          persistence_outputs=None,
                          persistence_data=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None,
                          ports=None,
                          secret_refs=None,
                          configmap_refs=None,
                          ephemeral_token=None,
                          node_selector=None,
                          affinity=None,
                          tolerations=None,
                          restart_policy='OnFailure'):
        """Pod spec to be used to create pods for tasks: master, worker, ps."""
        volume_mounts = to_list(volume_mounts, check_none=True)
        volumes = to_list(volumes, check_none=True)

        gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources)
        volume_mounts += gpu_volume_mounts
        volumes += gpu_volumes

        pod_container = self.get_pod_container(volume_mounts=volume_mounts,
                                               persistence_outputs=persistence_outputs,
                                               persistence_data=persistence_data,
                                               outputs_refs_jobs=outputs_refs_jobs,
                                               outputs_refs_experiments=outputs_refs_experiments,
                                               secret_refs=secret_refs,
                                               configmap_refs=configmap_refs,
                                               resources=resources,
                                               env_vars=env_vars,
                                               command=command,
                                               args=args,
                                               ports=ports,
                                               ephemeral_token=ephemeral_token)

        containers = [pod_container]
        if self.use_sidecar:
            sidecar_container = self.get_sidecar_container(resource_name=resource_name)
            containers.append(sidecar_container)

        node_selector = self._get_node_selector(node_selector=node_selector)
        affinity = self._get_affinity(affinity=affinity)
        tolerations = self._get_tolerations(tolerations=tolerations)
        service_account_name = self._get_service_account_name()
        return client.V1PodSpec(
            restart_policy=restart_policy,
            service_account_name=service_account_name,
            init_containers=to_list(self.get_init_container(persistence_outputs), check_none=True),
            containers=containers,
            volumes=volumes,
            node_selector=node_selector,
            tolerations=tolerations,
            affinity=affinity)
Exemple #6
0
 def get_init_container(self, init_command, init_args, env_vars,
                        context_mounts, persistence_outputs,
                        persistence_data):
     """Pod init container for setting outputs path."""
     env_vars = to_list(env_vars, check_none=True)
     _, outputs_volume_mount = get_pod_outputs_volume(
         persistence_outputs=persistence_outputs)
     volume_mounts = outputs_volume_mount + to_list(context_mounts,
                                                    check_none=True)
     init_command = init_command or ["/bin/sh", "-c"]
     init_args = init_args or []
     init_args += to_list(
         self.get_init_path_args(persistence_outputs=persistence_outputs),
         check_none=True)
     init_args += to_list(
         get_auth_context_args(entity='experiment',
                               entity_name=self.experiment_name))
     return [
         client.V1Container(
             name=self.init_container_name,
             image=self.init_docker_image,
             image_pull_policy=self.init_docker_image_pull_policy,
             command=init_command,
             args=[''.join(init_args)],
             env=env_vars,
             resources=get_init_resources(),
             volume_mounts=volume_mounts)
     ]
Exemple #7
0
    def get_is_default(self, obj):
        defaults = to_list(self.context.get('defaults', None), check_none=True)

        if defaults is not None:
            return obj.id in defaults
        else:
            # Get the requesting user if set in the context
            request = self.context.get('request', None)
            if request and is_user(request.user):
                defaults = to_list(conf.get(self.default_option))
                return obj.id in defaults
        return False
Exemple #8
0
def get_project_pod_spec(volume_mounts,
                         volumes,
                         image,
                         command,
                         args,
                         ports,
                         env_vars=None,
                         env_from=None,
                         container_name=None,
                         resources=None,
                         node_selector=None,
                         affinity=None,
                         tolerations=None,
                         image_pull_policy=None,
                         restart_policy=None,
                         service_account_name=None):
    """Pod spec to be used to create pods for project: tensorboard, notebooks."""
    env_vars = to_list(env_vars, check_none=True)
    volume_mounts = to_list(volume_mounts, check_none=True)
    volumes = to_list(volumes, check_none=True)

    gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources)
    volume_mounts += gpu_volume_mounts
    volumes += gpu_volumes

    ports = [client.V1ContainerPort(container_port=port) for port in ports]
    env_vars += get_resources_env_vars(resources=resources)

    containers = [
        client.V1Container(name=container_name,
                           image=image,
                           image_pull_policy=image_pull_policy,
                           command=command,
                           args=args,
                           ports=ports,
                           env=env_vars,
                           env_from=env_from,
                           resources=get_resources(resources),
                           volume_mounts=volume_mounts)
    ]

    if service_account_name and not conf.get('K8S_RBAC_ENABLED'):
        service_account_name = None

    return client.V1PodSpec(restart_policy=restart_policy,
                            service_account_name=service_account_name,
                            containers=containers,
                            volumes=volumes,
                            node_selector=node_selector,
                            affinity=affinity,
                            tolerations=tolerations)
Exemple #9
0
 def render(self):
     docker_template = jinja2.Template(POLYAXON_DOCKER_TEMPLATE)
     return docker_template.render(
         image=self.build_context.image,
         copy=to_list(self.build_context.copy, check_none=True),
         run=to_list(self.build_context.run, check_none=True),
         env=to_list(self.build_context.env, check_none=True),
         workdir=self.build_context.workdir,
         path=to_list(self.build_context.path, check_none=True),
         workdir_path=self.build_context.workdir_path,
         lang_env=self.build_context.lang_env,
         uid=self.build_context.uid,
         gid=self.build_context.gid,
         shell=self.build_context.shell,
     )
    def __init__(self, filepaths, params=None, debug_ttl=False):
        filepaths = to_list(filepaths)
        for filepath in filepaths:
            if not os.path.isfile(filepath):
                raise PolyaxonfileError(
                    "`{}` must be a valid file".format(filepath))
        self._filenames = [
            os.path.basename(filepath) for filepath in filepaths
        ]
        if params:
            if not isinstance(params, Mapping):
                raise PolyaxonfileError(
                    "Params: `{}` must be a valid mapping".format(params))
            filepaths.append({'params': params})
        if debug_ttl:
            if not isinstance(debug_ttl, int):
                raise PolyaxonfileError(
                    "Debug TTL `{}` must be a valid integer".format(debug_ttl))
            filepaths.append({'run': {'cmd': 'sleep {}'.format(debug_ttl)}})
        data = rhea.read(filepaths)
        kind = BaseSpecification.get_kind(data=data)

        debug_cond = (debug_ttl
                      and not (BaseSpecification.check_kind_experiment(kind)
                               or BaseSpecification.check_kind_job(kind)))
        if debug_cond:
            raise PolyaxonfileError(
                'You can only trigger debug mode on a job or an experiment specification, '
                'received instead a `{}` specification'.format(kind))
        try:
            self.specification = SPECIFICATION_BY_KIND[kind](data)
        except PolyaxonConfigurationError as e:
            raise PolyaxonfileError(e)
Exemple #11
0
 def handle_logs(message):
     log_lines = to_list(message['log_lines'])
     status = message.get('status')
     if not status and log_lines:
         handle_log_lines(log_lines)
     else:
         handle_status(status, log_lines)
Exemple #12
0
def check_polyaxonfile(file,  # pylint:disable=redefined-builtin
                       params=None,
                       debug_ttl=None,
                       log=True):
    file = to_list(file)
    exists = [os.path.isfile(f) for f in file]

    parsed_params = None
    if params:
        parsed_params = parse_params(params)

    if not any(exists):
        Printer.print_error('Polyaxonfile is not present, '
                            'please run {}'.format(constants.INIT_COMMAND))
        sys.exit(1)

    try:
        plx_file = PolyaxonFile(file, params=parsed_params, debug_ttl=debug_ttl)
        if log:
            Printer.print_success("Polyaxonfile valid")
        return plx_file
    except Exception as e:
        Printer.print_error("Polyaxonfile is not valid.")
        Printer.print_error('Error message `{}`.'.format(e))
        sys.exit(1)
Exemple #13
0
    def get_init_container(self, persistence_outputs):
        """Pod init container for setting outputs path."""
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.RESUME:
            return []
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.COPY:
            command = InitCommands.COPY
            original_outputs_path = stores.get_experiment_outputs_path(
                persistence=persistence_outputs,
                experiment_name=self.original_name)
        else:
            command = InitCommands.CREATE
            original_outputs_path = None

        outputs_path = stores.get_experiment_outputs_path(
            persistence=persistence_outputs,
            experiment_name=self.experiment_name)
        _, outputs_volume_mount = get_pod_outputs_volume(
            persistence_outputs=persistence_outputs)
        return [
            client.V1Container(
                name=self.init_container_name,
                image=self.init_docker_image,
                command=["/bin/sh", "-c"],
                args=to_list(
                    get_output_args(
                        command=command,
                        outputs_path=outputs_path,
                        original_outputs_path=original_outputs_path)),
                volume_mounts=outputs_volume_mount)
        ]
Exemple #14
0
 def handle_logs(message):
     log_lines = to_list(message["log_lines"])
     status = message.get("status")
     if not status and log_lines:
         handle_log_lines(log_lines)
     else:
         handle_status(status, log_lines)
Exemple #15
0
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          persistence_outputs=None,
                          persistence_data=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          secret_refs=None,
                          configmap_refs=None,
                          resources=None,
                          ephemeral_token=None):
        """Pod job container for task."""
        assert self.cluster_def is not None

        # Env vars preparations
        env_vars = to_list(env_vars, check_none=True)
        outputs_path = get_experiment_outputs_path(
            persistence_outputs=persistence_outputs,
            experiment_name=self.experiment_name,
            original_name=self.original_name,
            cloning_strategy=self.cloning_strategy)
        env_vars += get_job_env_vars(
            persistence_outputs=persistence_outputs,
            outputs_path=outputs_path,
            persistence_data=persistence_data,
            log_level=self.log_level,
            logs_path=get_experiment_logs_path(self.experiment_name,
                                               temp=False),
            outputs_refs_jobs=outputs_refs_jobs,
            outputs_refs_experiments=outputs_refs_experiments,
            ephemeral_token=ephemeral_token,
        )
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME,
                        value=json.dumps(self.cluster_def)),
            get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME,
                        value=self.declarations),
            get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME,
                        value=json.dumps(self.experiment_labels)),
        ]
        env_vars += get_resources_env_vars(resources=resources)

        # Env from configmap and secret refs
        env_from = get_pod_env_from(secret_refs=secret_refs,
                                    configmap_refs=configmap_refs)

        ports = [
            client.V1ContainerPort(container_port=port) for port in self.ports
        ]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  env_from=env_from,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
Exemple #16
0
def check_polyaxonfile(polyaxonfile,
                       params=None,
                       profile=None,
                       queue=None,
                       nocache=None,
                       log=True):
    if not polyaxonfile:
        polyaxonfile = PolyaxonFile.check_default_path(path=".")
    if not polyaxonfile:
        polyaxonfile = ""

    polyaxonfile = to_list(polyaxonfile)
    exists = [os.path.isfile(f) for f in polyaxonfile]

    parsed_params = None
    if params:
        parsed_params = parse_params(params)

    if not any(exists):
        Printer.print_error("Polyaxonfile is not present, "
                            "please run {}".format(constants.INIT_COMMAND))
        sys.exit(1)

    try:
        plx_file = PolyaxonFile(polyaxonfile)
        plx_file = plx_file.get_op_specification(params=parsed_params,
                                                 profile=profile,
                                                 queue=queue,
                                                 nocache=nocache)
        if log:
            Printer.print_success("Polyaxonfile valid")
        return plx_file
    except Exception as e:
        handle_cli_error(e, message="Polyaxonfile is not valid.")
        sys.exit(1)
Exemple #17
0
def _set_persistence(instance,
                     default_persistence_data=None,
                     default_persistence_outputs=None):
    if instance.persistence:
        return

    data_refs = None
    artifact_refs = None

    cond = (instance.specification and instance.specification.environment
            and instance.specification.environment.data_refs)
    if cond:
        data_refs = instance.specification.environment.data_refs

    cond = (instance.specification and instance.specification.environment
            and instance.specification.environment.artifact_refs)
    if cond:
        # TODO: this is a temp workaround until the finalized Polyflow version
        artifact_refs = to_list(
            instance.specification.environment.artifact_refs)[0]

    if not data_refs and default_persistence_data:
        data_refs = default_persistence_data

    if not artifact_refs and default_persistence_outputs:
        artifact_refs = default_persistence_outputs

    persistence_data = validate_persistence_data(persistence_data=data_refs)
    persistence_outputs = validate_persistence_outputs(
        persistence_outputs=artifact_refs)
    persistence_config = PersistenceConfig(data=persistence_data,
                                           outputs=persistence_outputs)
    instance.persistence = persistence_config.to_dict()
Exemple #18
0
def get_sidecar_container(job_container_name,
                          sidecar_container_name,
                          sidecar_docker_image,
                          sidecar_docker_image_pull_policy,
                          namespace,
                          sidecar_config,
                          sidecar_args,
                          internal_health_check_url,
                          internal_reconcile_url,
                          volume_mounts,
                          env_vars=None):
    """Return a pod sidecar container."""
    env_vars = to_list(env_vars) if env_vars else []
    env_vars += get_sidecar_env_vars(
        namespace=namespace,
        job_container_name=job_container_name,
        internal_health_check_url=internal_health_check_url,
        internal_reconcile_url=internal_reconcile_url)
    for k, v in sidecar_config.items():
        env_vars.append(get_env_var(name=k, value=v))
    return client.V1Container(
        name=sidecar_container_name,
        image=sidecar_docker_image,
        image_pull_policy=sidecar_docker_image_pull_policy,
        command=get_sidecar_command(),
        env=env_vars,
        volume_mounts=volume_mounts,
        resources=get_sidecar_resources(),
        args=sidecar_args)
Exemple #19
0
 def gpu_resources(cls, jobs_resources):
     jobs_resources = to_list(jobs_resources)
     click.clear()
     data = [[
         'job_name', 'name', 'GPU Usage', 'GPU Mem Usage / Total',
         'GPU Temperature', 'Power Draw / Limit'
     ]]
     non_gpu_jobs = 0
     for job_resources in jobs_resources:
         job_resources = ContainerResourcesConfig.from_dict(job_resources)
         line = []
         if not job_resources.gpu_resources:
             non_gpu_jobs += 1
             continue
         for gpu_resources in job_resources.gpu_resources:
             line += [
                 job_resources.job_name,
                 gpu_resources.name,
                 to_percentage(gpu_resources.utilization_gpu / 100),
                 '{} / {}'.format(
                     to_unit_memory(gpu_resources.memory_used),
                     to_unit_memory(gpu_resources.memory_total)),
                 gpu_resources.temperature_gpu,
                 '{} / {}'.format(gpu_resources.power_draw,
                                  gpu_resources.power_limit),
             ]
         data.append(line)
     if non_gpu_jobs == len(jobs_resources):
         Printer.print_error(
             'No GPU job was found, please run `resources` command without `-g | --gpu` option.'
         )
         exit(1)
     click.echo(tabulate(data, headers="firstrow"))
     sys.stdout.flush()
Exemple #20
0
    def _get_valid_config(cls, config, *fields) -> ConfigType:
        config = to_list(config)
        web_hooks = []
        for web_hook in config:
            if not web_hook.get('url'):
                logger.warning("Settings contains a non compatible web hook: `%s`", web_hook)
                continue

            url = web_hook['url']
            if not validate_url(url):
                raise PolyaxonActionException('{} received invalid URL `{}`.'.format(cls.name, url))

            method = web_hook.get('method', 'POST')
            if not isinstance(method, str):
                raise PolyaxonActionException(
                    '{} received invalid method `{}`.'.format(cls.name, method))

            _method = method.upper()
            if _method not in ['GET', 'POST']:
                raise PolyaxonActionException(
                    '{} received non compatible method `{}`.'.format(cls.name, method))

            result_web_hook = {'url': url, 'method': _method}
            for field in fields:
                if field in web_hook:
                    result_web_hook[field] = web_hook[field]
            web_hooks.append(result_web_hook)

        return web_hooks
Exemple #21
0
    def __init__(self, filepaths):
        filepaths = to_list(filepaths)
        for filepath in filepaths:
            if not os.path.isfile(filepath):
                raise PolyaxonfileError("`{}` must be a valid file".format(filepath))
        self._filenames = [os.path.basename(filepath) for filepath in filepaths]

        self.specification = get_specification(data=reader.read(filepaths))
Exemple #22
0
    def get_task_pod(self,
                     task_type,
                     task_idx,
                     volume_mounts,
                     volumes,
                     labels,
                     env_vars=None,
                     init_env_vars=None,
                     command=None,
                     args=None,
                     ports=None,
                     persistence_outputs=None,
                     persistence_data=None,
                     outputs_refs_jobs=None,
                     outputs_refs_experiments=None,
                     secret_refs=None,
                     configmap_refs=None,
                     resources=None,
                     ephemeral_token=None,
                     node_selector=None,
                     affinity=None,
                     tolerations=None,
                     sidecar_context_mounts=None,
                     init_context_mounts=None,
                     restart_policy=None):
        resource_name = self.get_resource_name(task_type=task_type,
                                               task_idx=task_idx)
        env_vars = to_list(env_vars, check_none=True)
        env_vars.append(
            client.V1EnvVar(name=constants.CONFIG_MAP_TASK_INFO_KEY_NAME,
                            value=json.dumps({
                                'type': task_type,
                                'index': task_idx
                            })))

        return self.get_pod(resource_name=resource_name,
                            volume_mounts=volume_mounts,
                            volumes=volumes,
                            labels=labels,
                            env_vars=env_vars,
                            command=command,
                            args=args,
                            init_env_vars=init_env_vars,
                            ports=ports,
                            persistence_outputs=persistence_outputs,
                            persistence_data=persistence_data,
                            outputs_refs_jobs=outputs_refs_jobs,
                            outputs_refs_experiments=outputs_refs_experiments,
                            secret_refs=secret_refs,
                            configmap_refs=configmap_refs,
                            resources=resources,
                            ephemeral_token=ephemeral_token,
                            node_selector=node_selector,
                            affinity=affinity,
                            tolerations=tolerations,
                            sidecar_context_mounts=sidecar_context_mounts,
                            init_context_mounts=init_context_mounts,
                            restart_policy=restart_policy)
Exemple #23
0
def validate_tags(tags):
    if not tags:
        return None

    if isinstance(tags, six.string_types):
        tags = [tag.strip() for tag in tags.split(",")]
    tags = to_list(tags)
    tags = [tag for tag in tags if (tag and isinstance(tag, six.string_types))]
    return tags
    def upload(self,
               url,
               files,
               files_size,
               params=None,
               json_data=None,
               timeout=None,
               headers=None,
               session=None):

        if files_size > settings.WARN_UPLOAD_SIZE:
            logger.warning(
                "You are uploading %s, there's a hard limit of %s.\n"
                "If you have data files in the current directory, "
                "please make sure to add them to .polyaxonignore or "
                "add them directly to your data volume, or upload them "
                "separately using `polyaxon data` command and remove them from here.\n",
                self.format_sizeof(settings.WARN_UPLOAD_SIZE),
                self.format_sizeof(settings.MAX_UPLOAD_SIZE))

        if files_size > settings.MAX_UPLOAD_SIZE:
            raise PolyaxonShouldExitError(
                "Files too large to sync, please keep it under {}.\n"
                "If you have data files in the current directory, "
                "please add them directly to your data volume, or upload them "
                "separately using `polyaxon data` command and remove them from here.\n"
                .format(self.format_sizeof(settings.MAX_UPLOAD_SIZE)))

        files = to_list(files)
        if json_data:
            files.append(('json', json.dumps(json_data)))

        multipart_encoder = MultipartEncoder(fields=files)
        request_headers = headers or {}
        request_headers.update(
            {"Content-Type": multipart_encoder.content_type})

        # Attach progress bar
        progress_callback, callback_bar = self.create_progress_callback(
            multipart_encoder)
        multipart_encoder_monitor = MultipartEncoderMonitor(
            multipart_encoder, progress_callback)

        timeout = timeout if timeout is not None else settings.LONG_REQUEST_TIMEOUT

        try:
            response = self.put(url=url,
                                params=params,
                                data=multipart_encoder_monitor,
                                headers=request_headers,
                                timeout=timeout,
                                session=session)
        finally:
            # always make sure we clear the console
            callback_bar.done()

        return response
Exemple #25
0
 def get_init_container(self, init_command, init_args, env_vars,
                        context_mounts, persistence_outputs,
                        persistence_data):
     """Pod init container for setting outputs path."""
     env_vars = to_list(env_vars, check_none=True)
     volume_mounts = to_list(context_mounts, check_none=True)
     init_command = init_command or ["/bin/sh", "-c"]
     init_args = to_list(
         get_auth_context_args(entity='notebook',
                               entity_name=self.job_name))
     return client.V1Container(
         name=self.init_container_name,
         image=self.init_docker_image,
         image_pull_policy=self.init_docker_image_pull_policy,
         command=init_command,
         args=init_args,
         env=env_vars,
         volume_mounts=volume_mounts)
Exemple #26
0
 def __init__(self,
              namespace,
              name,
              project_name,
              project_uuid,
              job_name,
              job_uuid,
              job_docker_image,
              job_container_name=None,
              sidecar_container_name=None,
              sidecar_docker_image=None,
              sidecar_docker_image_pull_policy=None,
              init_container_name=None,
              init_docker_image=None,
              role_label=None,
              type_label=None,
              ports=None,
              use_sidecar=False,
              sidecar_config=None,
              health_check_url=None,
              log_level=None):
     self.namespace = namespace
     self.name = name
     self.project_name = project_name
     self.project_uuid = project_uuid
     self.job_name = job_name
     self.job_uuid = job_uuid
     self.job_container_name = job_container_name or conf.get(
         'CONTAINER_NAME_JOB')
     self.job_docker_image = job_docker_image
     self.sidecar_container_name = sidecar_container_name or conf.get(
         'CONTAINER_NAME_SIDECAR')
     self.sidecar_docker_image = sidecar_docker_image or conf.get(
         'JOB_SIDECAR_DOCKER_IMAGE')
     self.sidecar_docker_image_pull_policy = (
         sidecar_docker_image_pull_policy
         or conf.get('JOB_SIDECAR_DOCKER_IMAGE_PULL_POLICY'))
     self.init_container_name = init_container_name or conf.get(
         'CONTAINER_NAME_INIT')
     self.init_docker_image = init_docker_image or conf.get(
         'JOB_INIT_DOCKER_IMAGE')
     self.role_label = role_label or conf.get('ROLE_LABELS_WORKER')
     self.type_label = type_label or conf.get('TYPE_LABELS_RUNNER')
     self.app_label = conf.get('APP_LABELS_JOB')
     self.labels = self.get_labels()
     self.k8s_job_name = self.get_k8s_job_name()
     self.ports = to_list(ports) if ports else []
     self.use_sidecar = use_sidecar
     if use_sidecar and not sidecar_config:
         raise PolyaxonConfigurationError(
             'In order to use a `sidecar_config` is required. '
             'The `sidecar_config` must correspond to the sidecar docker image used.'
         )
     self.sidecar_config = sidecar_config
     self.health_check_url = health_check_url
     self.log_level = log_level
Exemple #27
0
    def get_init_container(self,
                           init_command,
                           init_args,
                           env_vars,
                           context_mounts,
                           persistence_outputs,
                           persistence_data):
        """Pod init container for setting outputs path."""
        env_vars = to_list(env_vars, check_none=True)
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.RESUME:
            return []
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.COPY:
            command = InitCommands.COPY
            original_outputs_path = stores.get_experiment_outputs_path(
                persistence=persistence_outputs,
                experiment_name=self.original_name)
        else:
            command = InitCommands.CREATE
            original_outputs_path = None

        outputs_path = stores.get_experiment_outputs_path(
            persistence=persistence_outputs,
            experiment_name=self.experiment_name)
        _, outputs_volume_mount = get_pod_outputs_volume(persistence_outputs=persistence_outputs)
        volume_mounts = outputs_volume_mount + to_list(context_mounts, check_none=True)
        init_command = init_command or ["/bin/sh", "-c"]
        init_args = init_args or to_list(
            get_output_args(command=command,
                            outputs_path=outputs_path,
                            original_outputs_path=original_outputs_path))
        init_args += to_list(get_auth_context_args(entity='experiment',
                                                   entity_name=self.experiment_name))
        return [
            client.V1Container(
                name=self.init_container_name,
                image=self.init_docker_image,
                image_pull_policy=self.init_docker_image_pull_policy,
                command=init_command,
                args=[''.join(init_args)],
                env=env_vars,
                resources=get_init_resources(),
                volume_mounts=volume_mounts)
        ]
Exemple #28
0
 def run(cls, task_bind, *args, **kwargs):
     retry_for = cls.retry_for or []
     retry_for = to_list(retry_for)
     if SoftTimeLimitExceeded not in retry_for:
         retry_for.append(SoftTimeLimitExceeded)
     try:
         return cls._run(task_bind, *args, **kwargs)
     except tuple(retry_for) as exc:  # pylint:disable=catching-non-exception
         if task_bind.request.retries < task_bind.max_retries:
             raise task_bind.retry(countdown=task_bind.countdown)
         else:
             raise exc  # pylint:disable=raising-non-exception
Exemple #29
0
    def __init__(self,
                 repo_path: str,
                 from_image: str,
                 copy_code: bool = True,
                 build_steps: Optional[List[str]] = None,
                 env_vars: Optional[List[Tuple[str, str]]] = None,
                 nvidia_bin: str = None,
                 dockerfile_name: str = 'Dockerfile') -> None:
        self.from_image = from_image
        self.folder_name = repo_path.split('/')[-1]
        self.repo_path = repo_path
        self.copy_code = copy_code

        self.build_path = '/'.join(self.repo_path.split('/')[:-1])
        self.build_steps = to_list(build_steps, check_none=True)
        self.env_vars = to_list(env_vars, check_none=True)
        self.nvidia_bin = nvidia_bin
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.is_pushing = False
Exemple #30
0
    def publish_job_log(self, log_lines, job_uuid, job_name, send_task=True):
        log_lines = to_list(log_lines)

        self._logger.info("Publishing log event for task: %s", job_uuid)
        if send_task:
            celery_app.send_task(LogsCeleryTasks.LOGS_HANDLE_JOB,
                                 kwargs={
                                     'job_uuid': job_uuid,
                                     'job_name': job_name,
                                     'log_lines': log_lines
                                 })
        self._stream_job_log(job_uuid=job_uuid,
                             log_lines=log_lines,
                             routing_key=RoutingKeys.STREAM_LOGS_SIDECARS_JOBS)