Ejemplo n.º 1
0
 def _assert_runtime_handler_list_resources(
     runtime_kind,
     expected_crds=None,
     expected_pods=None,
     expected_services=None,
 ):
     runtime_handler = get_runtime_handler(runtime_kind)
     resources = runtime_handler.list_resources()
     crd_group, crd_version, crd_plural = runtime_handler._get_crd_info()
     get_k8s().v1api.list_namespaced_pod.assert_called_once_with(
         get_k8s().resolve_namespace(),
         label_selector=runtime_handler._get_default_label_selector(),
     )
     if expected_crds:
         get_k8s(
         ).crdapi.list_namespaced_custom_object.assert_called_once_with(
             crd_group,
             crd_version,
             get_k8s().resolve_namespace(),
             crd_plural,
             label_selector=runtime_handler._get_default_label_selector(),
         )
     if expected_services:
         get_k8s().v1api.list_namespaced_service.assert_called_once_with(
             get_k8s().resolve_namespace(),
             label_selector=runtime_handler._get_default_label_selector(),
         )
     TestRuntimeHandlerBase._assert_list_resources_response(
         resources,
         expected_crds=expected_crds,
         expected_pods=expected_pods,
         expected_services=expected_services,
     )
Ejemplo n.º 2
0
    def custom_setup(self):
        self.runtime_handler = get_runtime_handler(RuntimeKinds.spark)

        # initializing them here to save space in tests
        self.running_crd_dict = self._generate_sparkjob_crd(
            self.project, self.run_uid, self._get_running_crd_status(),
        )
        self.completed_crd_dict = self._generate_sparkjob_crd(
            self.project, self.run_uid, self._get_completed_crd_status(),
        )
        self.failed_crd_dict = self._generate_sparkjob_crd(
            self.project, self.run_uid, self._get_failed_crd_status(),
        )

        executor_pod_labels = {
            "mlrun/class": "spark",
            "mlrun/function": "my-spark-jdbc",
            "mlrun/job": "my-spark-jdbc-2ea432f1",
            "mlrun/name": "my-spark-jdbc",
            "mlrun/project": self.project,
            "mlrun/uid": self.run_uid,
            "mlrun/scrape_metrics": "False",
            "mlrun/tag": "latest",
            "spark-app-selector": "spark-12f88a73cb544ce298deba34947226a4",
            "spark-exec-id": "1",
            "spark-role": "executor",
            "sparkoperator.k8s.io/app-name": "my-spark-jdbc-2ea432f1",
            "sparkoperator.k8s.io/launched-by-spark-operator": "true",
            "sparkoperator.k8s.io/submission-id": "44343f6b-42ca-41d4-b01a-66052cc5c919",
        }
        executor_pod_name = "my-spark-jdbc-2ea432f1-1597760338437-exec-1"

        self.executor_pod = self._generate_pod(
            executor_pod_name, executor_pod_labels, PodPhases.running,
        )

        driver_pod_labels = {
            "mlrun/class": "spark",
            "mlrun/function": "my-spark-jdbc",
            "mlrun/job": "my-spark-jdbc-2ea432f1",
            "mlrun/name": "my-spark-jdbc",
            "mlrun/project": self.project,
            "mlrun/uid": self.run_uid,
            "mlrun/scrape_metrics": "False",
            "mlrun/tag": "latest",
            "spark-app-selector": "spark-12f88a73cb544ce298deba34947226a4",
            "spark-role": "driver",
            "sparkoperator.k8s.io/app-name": "my-spark-jdbc-2ea432f1",
            "sparkoperator.k8s.io/launched-by-spark-operator": "true",
            "sparkoperator.k8s.io/submission-id": "44343f6b-42ca-41d4-b01a-66052cc5c919",
        }
        driver_pod_name = "my-spark-jdbc-2ea432f1-driver"

        self.driver_pod = self._generate_pod(
            driver_pod_name, driver_pod_labels, PodPhases.running,
        )

        self.pod_label_selector = self._generate_get_logger_pods_label_selector(
            self.runtime_handler
        )
Ejemplo n.º 3
0
def list_runtimes(label_selector: str = None):
    runtimes = []
    for kind in RuntimeKinds.runtime_with_handlers():
        runtime_handler = get_runtime_handler(kind)
        resources = runtime_handler.list_resources(label_selector)
        runtimes.append({"kind": kind, "resources": resources})
    return runtimes
Ejemplo n.º 4
0
    def custom_setup(self):
        config.mpijob_crd_version = MPIJobCRDVersions.v1
        self.runtime_handler = get_runtime_handler(RuntimeKinds.mpijob)
        self.runtime_handler.wait_for_deletion_interval = 0

        # initializing them here to save space in tests
        self.active_crd_dict = self._generate_mpijob_crd(
            self.project, self.run_uid, self._get_active_crd_status(),
        )
        self.succeeded_crd_dict = self._generate_mpijob_crd(
            self.project, self.run_uid, self._get_succeeded_crd_status(),
        )
        self.failed_crd_dict = self._generate_mpijob_crd(
            self.project, self.run_uid, self._get_failed_crd_status(),
        )
        self.no_status_crd_dict = self._generate_mpijob_crd(self.project, self.run_uid,)

        launcher_pod_labels = {
            "group-name": "kubeflow.org",
            "mlrun/class": "mpijob",
            "mlrun/function": "trainer",
            "mlrun/job": "trainer-1b019005",
            "mlrun/name": "trainer",
            "mlrun/owner": "iguazio",
            "mlrun/project": self.project,
            "mlrun/scrape-metrics": "True",
            "mlrun/tag": "latest",
            "mlrun/uid": self.run_uid,
            "mpi-job-name": "trainer-1b019005",
            "mpi-job-role": "launcher",
        }
        launcher_pod_name = "trainer-1b019005-launcher"

        self.launcher_pod = self._generate_pod(
            launcher_pod_name, launcher_pod_labels, PodPhases.running,
        )

        worker_pod_labels = {
            "group-name": "kubeflow.org",
            "mlrun/class": "mpijob",
            "mlrun/function": "trainer",
            "mlrun/job": "trainer-1b019005",
            "mlrun/name": "trainer",
            "mlrun/owner": "iguazio",
            "mlrun/project": self.project,
            "mlrun/scrape-metrics": "True",
            "mlrun/tag": "latest",
            "mlrun/uid": self.run_uid,
            "mpi-job-name": "trainer-1b019005",
            "mpi-job-role": "worker",
        }
        worker_pod_name = "trainer-1b019005-worker-0"

        self.worker_pod = self._generate_pod(
            worker_pod_name, worker_pod_labels, PodPhases.running,
        )

        self.pod_label_selector = self._generate_get_logger_pods_label_selector(
            self.runtime_handler
        )
Ejemplo n.º 5
0
def delete_runtimes(label_selector: str = None,
                    force: bool = False,
                    db_session: Session = Depends(deps.get_db_session)):
    for kind in RuntimeKinds.runtime_with_handlers():
        runtime_handler = get_runtime_handler(kind)
        runtime_handler.delete_resources(get_db(), db_session, label_selector,
                                         force)
    return Response(status_code=status.HTTP_204_NO_CONTENT)
Ejemplo n.º 6
0
def _cleanup_runtimes():
    logger.debug('Cleaning runtimes')
    db_session = create_session()
    try:
        for kind in RuntimeKinds.runtime_with_handlers():
            runtime_handler = get_runtime_handler(kind)
            runtime_handler.delete_resources(get_db(), db_session)
    finally:
        close_session(db_session)
Ejemplo n.º 7
0
def delete_runtimes(
        label_selector: str = None,
        force: bool = False,
        grace_period: int = config.runtime_resources_deletion_grace_period,
        db_session: Session = Depends(deps.get_db_session),
):
    for kind in RuntimeKinds.runtime_with_handlers():
        runtime_handler = get_runtime_handler(kind)
        runtime_handler.delete_resources(get_db(), db_session, label_selector,
                                         force, grace_period)
    return Response(status_code=HTTPStatus.NO_CONTENT.value)
Ejemplo n.º 8
0
def get_runtime(kind: str, label_selector: str = None):
    if kind not in RuntimeKinds.runtime_with_handlers():
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      kind=kind,
                      err="Invalid runtime kind")
    runtime_handler = get_runtime_handler(kind)
    resources = runtime_handler.list_resources(label_selector)
    return {
        "kind": kind,
        "resources": resources,
    }
Ejemplo n.º 9
0
def get_runtime(kind: str, label_selector: str = None):
    if kind not in RuntimeKinds.runtime_with_handlers():
        log_and_raise(status.HTTP_400_BAD_REQUEST,
                      kind=kind,
                      err='Invalid runtime kind')
    runtime_handler = get_runtime_handler(kind)
    resources = runtime_handler.list_resources(label_selector)
    return {
        'kind': kind,
        'resources': resources,
    }
Ejemplo n.º 10
0
def delete_runtime(kind: str,
                   label_selector: str = None,
                   force: bool = False,
                   db_session: Session = Depends(deps.get_db_session)):
    if kind not in RuntimeKinds.runtime_with_handlers():
        log_and_raise(status.HTTP_400_BAD_REQUEST,
                      kind=kind,
                      err='Invalid runtime kind')
    runtime_handler = get_runtime_handler(kind)
    runtime_handler.delete_resources(get_db(), db_session, label_selector,
                                     force)
    return Response(status_code=status.HTTP_204_NO_CONTENT)
Ejemplo n.º 11
0
def _cleanup_runtimes():
    db_session = create_session()
    try:
        for kind in RuntimeKinds.runtime_with_handlers():
            try:
                runtime_handler = get_runtime_handler(kind)
                runtime_handler.delete_resources(get_db(), db_session)
            except Exception as exc:
                logger.warning("Failed deleting resources. Ignoring",
                               exc=str(exc),
                               kind=kind)
    finally:
        close_session(db_session)
Ejemplo n.º 12
0
def _monitor_runs():
    db_session = create_session()
    try:
        for kind in RuntimeKinds.runtime_with_handlers():
            try:
                runtime_handler = get_runtime_handler(kind)
                runtime_handler.monitor_runs(get_db(), db_session)
            except Exception as exc:
                logger.warning("Failed monitoring runs. Ignoring",
                               exc=str(exc),
                               kind=kind)
    finally:
        close_session(db_session)
Ejemplo n.º 13
0
 def _assert_runtime_handler_list_resources(
     self,
     runtime_kind,
     expected_crds=None,
     expected_pods=None,
     expected_services=None,
     group_by: Optional[
         mlrun.api.schemas.ListRuntimeResourcesGroupByField] = None,
 ):
     runtime_handler = get_runtime_handler(runtime_kind)
     if group_by is None:
         project = "*"
         label_selector = runtime_handler._get_default_label_selector()
         assertion_func = TestRuntimeHandlerBase._assert_list_resources_response
     elif group_by == mlrun.api.schemas.ListRuntimeResourcesGroupByField.job:
         project = self.project
         label_selector = ",".join([
             runtime_handler._get_default_label_selector(),
             f"mlrun/project={self.project}",
         ])
         assertion_func = (
             TestRuntimeHandlerBase._assert_list_resources_grouped_response)
     else:
         raise NotImplementedError("Unsupported group by value")
     resources = runtime_handler.list_resources(project, group_by=group_by)
     crd_group, crd_version, crd_plural = runtime_handler._get_crd_info()
     get_k8s().v1api.list_namespaced_pod.assert_called_once_with(
         get_k8s().resolve_namespace(),
         label_selector=label_selector,
     )
     if expected_crds:
         get_k8s(
         ).crdapi.list_namespaced_custom_object.assert_called_once_with(
             crd_group,
             crd_version,
             get_k8s().resolve_namespace(),
             crd_plural,
             label_selector=label_selector,
         )
     if expected_services:
         get_k8s().v1api.list_namespaced_service.assert_called_once_with(
             get_k8s().resolve_namespace(),
             label_selector=label_selector,
         )
     assertion_func(
         resources,
         expected_crds=expected_crds,
         expected_pods=expected_pods,
         expected_services=expected_services,
     )
Ejemplo n.º 14
0
def delete_runtime(
        kind: str,
        label_selector: str = None,
        force: bool = False,
        grace_period: int = config.runtime_resources_deletion_grace_period,
        db_session: Session = Depends(deps.get_db_session),
):
    if kind not in RuntimeKinds.runtime_with_handlers():
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      kind=kind,
                      err="Invalid runtime kind")
    runtime_handler = get_runtime_handler(kind)
    runtime_handler.delete_resources(get_db(), db_session, label_selector,
                                     force, grace_period)
    return Response(status_code=HTTPStatus.NO_CONTENT.value)
Ejemplo n.º 15
0
    def custom_setup(self):
        config.mpijob_crd_version = MPIJobCRDVersions.v1
        self.runtime_handler = get_runtime_handler(RuntimeKinds.mpijob)

        # initializing them here to save space in tests
        self.active_crd_dict = self._generate_mpijob_crd(
            self.project, self.run_uid, self._get_active_crd_status(),
        )
        self.succeeded_crd_dict = self._generate_mpijob_crd(
            self.project, self.run_uid, self._get_succeeded_crd_status(),
        )
        self.failed_crd_dict = self._generate_mpijob_crd(
            self.project, self.run_uid, self._get_failed_crd_status(),
        )

        # there's currently a bug (fix was merged but not released https://github.com/kubeflow/mpi-operator/pull/271)
        # that causes mpijob's pods to not being labels with the given (MLRun's) labels - this prevents list resources
        # from finding the pods, so we're simulating the same thing here
        self._mock_list_namespaced_pods([[]])
Ejemplo n.º 16
0
    def custom_setup(self):
        self.runtime_handler = get_runtime_handler(RuntimeKinds.job)

        labels = {
            "mlrun/class": self._get_class_name(),
            "mlrun/function": "my-trainer",
            "mlrun/name": "my-training",
            "mlrun/project": self.project,
            "mlrun/scrape_metrics": "False",
            "mlrun/tag": "latest",
            "mlrun/uid": self.run_uid,
        }
        pod_name = "my-training-j7dtf"

        # initializing them here to save space in tests
        self.pending_pod = self._generate_pod(pod_name, labels, PodPhases.pending)
        self.running_pod = self._generate_pod(pod_name, labels, PodPhases.running)
        self.completed_pod = self._generate_pod(pod_name, labels, PodPhases.succeeded)
        self.failed_pod = self._generate_pod(pod_name, labels, PodPhases.failed)
Ejemplo n.º 17
0
    def custom_setup(self):
        self.runtime_handler = get_runtime_handler(RuntimeKinds.dask)

        # initializing them here to save space in tests
        scheduler_pod_labels = {
            "app": "dask",
            "dask.org/cluster-name": "mlrun-mydask-d7656bc1-0",
            "dask.org/component": "scheduler",
            "mlrun/class": "dask",
            "mlrun/function": "mydask",
            "mlrun/project": "default",
            "mlrun/scrape_metrics": "False",
            "mlrun/tag": "latest",
            "user": "******",
        }
        scheduler_pod_name = "mlrun-mydask-d7656bc1-0n4z9z"

        self.running_scheduler_pod = self._generate_pod(
            scheduler_pod_name,
            scheduler_pod_labels,
            PodPhases.running,
        )
        self.completed_scheduler_pod = self._generate_pod(
            scheduler_pod_name,
            scheduler_pod_labels,
            PodPhases.succeeded,
        )

        worker_pod_labels = {
            "app": "dask",
            "dask.org/cluster-name": "mlrun-mydask-d7656bc1-0",
            "dask.org/component": "worker",
            "mlrun/class": "dask",
            "mlrun/function": "mydask",
            "mlrun/project": "default",
            "mlrun/scrape_metrics": "False",
            "mlrun/tag": "latest",
            "user": "******",
        }
        worker_pod_name = "mlrun-mydask-d7656bc1-0pqbnc"

        self.running_worker_pod = self._generate_pod(
            worker_pod_name,
            worker_pod_labels,
            PodPhases.running,
        )
        self.completed_worker_pod = self._generate_pod(
            worker_pod_name,
            worker_pod_labels,
            PodPhases.succeeded,
        )

        service_name = "mlrun-mydask-d7656bc1-0"
        service_labels = {
            "app": "dask",
            "dask.org/cluster-name": "mlrun-mydask-d7656bc1-0",
            "dask.org/component": "scheduler",
            "mlrun/class": "dask",
            "mlrun/function": "mydask",
            "mlrun/project": "default",
            "mlrun/scrape_metrics": "False",
            "mlrun/tag": "latest",
            "user": "******",
        }

        self.cluster_service = self._generate_service(service_name,
                                                      service_labels)