예제 #1
0
파일: utils.py 프로젝트: DuneSys/mlrun
def resolve_mpijob_crd_version(api_context=False):
    global cached_mpijob_crd_version
    if not cached_mpijob_crd_version:

        # config override everything
        mpijob_crd_version = config.mpijob_crd_version

        if not mpijob_crd_version:
            in_k8s_cluster = get_k8s_helper(silent=True).is_running_inside_kubernetes_cluster()
            if in_k8s_cluster:
                k8s_helper = get_k8s_helper()
                namespace = k8s_helper.resolve_namespace()

                # try resolving according to mpi-operator that's running
                res = k8s_helper.list_pods(namespace=namespace, selector='component=mpi-operator')
                if len(res) > 0:
                    mpi_operator_pod = res[0]
                    mpijob_crd_version = mpi_operator_pod.metadata.labels.get('crd-version')
            elif not in_k8s_cluster and not api_context:
                # connect will populate the config from the server config
                # TODO: something nicer
                get_run_db().connect()
                mpijob_crd_version = config.mpijob_crd_version

            # If resolution failed simply use default
            if not mpijob_crd_version:
                mpijob_crd_version = MPIJobCRDVersions.default()

        if mpijob_crd_version not in MPIJobCRDVersions.all():
            raise ValueError(f'unsupported mpijob crd version: {mpijob_crd_version}. '
                             f'supported versions: {MPIJobCRDVersions.all()}')
        cached_mpijob_crd_version = mpijob_crd_version

    return cached_mpijob_crd_version
예제 #2
0
    def deploy_default_image(cls, with_gpu=False):
        from mlrun.run import new_function

        sj = new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
        sj.spec.build.image = cls._get_default_deployed_mlrun_image_name(with_gpu)

        sj.with_executor_requests(cpu=1, mem="512m", gpus=1 if with_gpu else None)
        sj.with_driver_requests(cpu=1, mem="512m", gpus=1 if with_gpu else None)

        sj.deploy()
        get_run_db().delete_function(name=sj.metadata.name)
예제 #3
0
 def deploy(self, watch=True, with_mlrun=True, skip_deployed=False, is_kfp=False):
     """deploy function, build container with dependencies"""
     # connect will populate the config from the server config
     get_run_db()
     if not self.spec.build.base_image:
         self.spec.build.base_image = self._default_image
     return super().deploy(
         watch=watch,
         with_mlrun=with_mlrun,
         skip_deployed=skip_deployed,
         is_kfp=is_kfp,
     )