def check_statefulsets_ready(record_xml_attribute, namespace, name, stateful_sets): """Test that Kubeflow deployments are successfully deployed. Args: namespace: The namespace to check """ set_logging() # TODO(jlewi): Should we do this in the calling function)? util.set_pytest_junit(record_xml_attribute, name) # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run(["gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]]) api_client = deploy_utils.create_k8s_client() util.load_kube_config() for set_name in stateful_sets: logging.info("Verifying that stateful set %s.%s started...", namespace, set_name) try: util.wait_for_statefulset(api_client, namespace, set_name) except: # Collect debug information by running describe util.run(["kubectl", "-n", namespace, "describe", "statefulsets", set_name]) raise Exception(f"Stateful set {namespace}.{name} is not ready")
def test_katib_is_ready(record_xml_attribute, namespace): """Test that Kubeflow was successfully deployed. Args: namespace: The namespace Kubeflow is deployed to. """ set_logging() util.set_pytest_junit(record_xml_attribute, "test_katib_is_ready") # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run([ "gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] ]) api_client = deploy_utils.create_k8s_client() util.load_kube_config() deployment_names = [ "katib-controller", "katib-mysql", "katib-db-manager", "katib-ui", ] for deployment_name in deployment_names: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name, 10)
def test_gcp_kf_admin_wi(record_xml_attribute, namespace, app_name, platform, project): """Test that the kubeflow admin SA has proper workload identity binding. Args: namespace: The namespace Kubeflow is deployed to. """ set_logging() util.set_pytest_junit(record_xml_attribute, "test_gcp_kf_admin_wi") # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run(["gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]]) api_client = deploy_utils.create_k8s_client() if platform != "gcp": pytest.skip("Not running on GCP") return cred = GoogleCredentials.get_application_default() # Create the Cloud IAM service object service = googleapiclient.discovery.build('iam', 'v1', credentials=cred) adminGcpSa = ('projects/%s/serviceAccounts/' '%s-admin@%s.iam.gserviceaccount.com') % ( project, app_name, project) adminSa = 'serviceAccount:%s-admin@%s.iam.gserviceaccount.com' % (app_name, project) request = service.projects().serviceAccounts().getIamPolicy( resource=adminGcpSa) response = request.execute() roleToMembers = {} for binding in response['bindings']: roleToMembers[binding['role']] = set(binding['members']) workloadIdentityRole = 'roles/iam.workloadIdentityUser' if workloadIdentityRole not in roleToMembers: raise Exception("roles/iam.workloadIdentityUser missing in iam-policy of " "service account %s" % adminGcpSa) account_str = "{project}.svc.id.goog[{namespace}/{account}]" # Expected workload identity users of the admin service account expected_wi_sa = [(namespace, "kf-admin"), (namespace, "profiles-controller-service-account"), ("istio-system", "kf-admin")] for sa in expected_wi_sa: gcp_sa = account_str.format(project=project, namespace=sa[0], account=sa[1]) error_message = ("GCP SA {0} missing workload identity binding for " "{1}").format(adminGcpSa, gcp_sa) binding = "serviceAccount:" + gcp_sa assert binding in roleToMembers[workloadIdentityRole], error_message
def test_gcp_access(record_xml_attribute, namespace, app_path, project): """Test that Kubeflow gcp was configured with workload_identity and GCP service account credentails. Args: namespace: The namespace Kubeflow is deployed to. """ set_logging() util.set_pytest_junit(record_xml_attribute, "test_gcp_access") # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run([ "gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] ]) api_client = deploy_utils.create_k8s_client() platform, app_name = get_platform_app_name(app_path) if platform == "gcp": # check secret util.check_secret(api_client, namespace, "user-gcp-sa") cred = GoogleCredentials.get_application_default() # Create the Cloud IAM service object service = googleapiclient.discovery.build('iam', 'v1', credentials=cred) userSa = 'projects/%s/serviceAccounts/%s-user@%s.iam.gserviceaccount.com' % ( project, app_name, project) adminSa = 'serviceAccount:%s-admin@%s.iam.gserviceaccount.com' % ( app_name, project) request = service.projects().serviceAccounts().getIamPolicy( resource=userSa) response = request.execute() roleToMembers = {} for binding in response['bindings']: roleToMembers[binding['role']] = set(binding['members']) if 'roles/owner' not in roleToMembers: raise Exception("roles/owner missing in iam-policy of %s" % userSa) if adminSa not in roleToMembers['roles/owner']: raise Exception("Admin %v should be owner of user %s" % (adminSa, userSa)) workloadIdentityRole = 'roles/iam.workloadIdentityUser' if workloadIdentityRole not in roleToMembers: raise Exception( "roles/iam.workloadIdentityUser missing in iam-policy of %s" % userSa)
def check_deployments_ready(record_xml_attribute, namespace, name, deployments, cluster_name): """Test that Kubeflow deployments are successfully deployed. Args: namespace: The namespace Kubeflow is deployed to. """ set_logging() util.set_pytest_junit(record_xml_attribute, name) kfctl_aws_util.aws_auth_load_kubeconfig(cluster_name) api_client = deploy_utils.create_k8s_client() for deployment_name in deployments: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name, 10)
def check_deployments_ready(record_xml_attribute, namespace, name, deployments): """Test that Kubeflow deployments are successfully deployed. Args: namespace: The namespace Kubeflow is deployed to. """ set_logging() # TODO(jlewi): Should we do this in the calling function)? util.set_pytest_junit(record_xml_attribute, name) # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run(["gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]]) api_client = deploy_utils.create_k8s_client() util.load_kube_config() for deployment_name in deployments: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name, 10)
def test_kf_is_ready(record_xml_attribute, namespace, use_basic_auth, use_istio, app_path): """Test that Kubeflow was successfully deployed. Args: namespace: The namespace Kubeflow is deployed to. """ set_logging() util.set_pytest_junit(record_xml_attribute, "test_kf_is_ready") # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run([ "gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] ]) api_client = deploy_utils.create_k8s_client() util.load_kube_config() # Verify that components are actually deployed. # TODO(jlewi): We need to parameterize this list based on whether # we are using IAP or basic auth. # TODO(yanniszark): This list is incomplete and missing a lot of components. deployment_names = [ "argo-ui", "centraldashboard", "jupyter-web-app-deployment", "minio", "ml-pipeline", "ml-pipeline-persistenceagent", "ml-pipeline-scheduledworkflow", "ml-pipeline-ui", "ml-pipeline-viewer-controller-deployment", "mysql", "notebook-controller-deployment", "profiles-deployment", "pytorch-operator", "tf-job-operator", "workflow-controller", ] stateful_set_names = [] platform, _ = get_platform_app_name(app_path) ingress_related_deployments = [ "istio-egressgateway", "istio-ingressgateway", "istio-pilot", "istio-policy", "istio-sidecar-injector", "istio-telemetry", "istio-tracing", "prometheus", ] ingress_related_stateful_sets = [] knative_namespace = "knative-serving" knative_related_deployments = [ "activator", "autoscaler", "controller", ] if platform == "gcp": deployment_names.extend(["cloud-endpoints-controller"]) stateful_set_names.extend(["kfserving-controller-manager"]) if use_basic_auth: deployment_names.extend(["basic-auth-login"]) ingress_related_stateful_sets.extend(["backend-updater"]) else: ingress_related_deployments.extend(["iap-enabler"]) ingress_related_stateful_sets.extend(["backend-updater"]) elif platform == "existing_arrikto": deployment_names.extend(["dex"]) ingress_related_deployments.extend(["authservice"]) knative_related_deployments = [] # TODO(jlewi): Might want to parallelize this. for deployment_name in deployment_names: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name, 10) ingress_namespace = "istio-system" if use_istio else namespace for deployment_name in ingress_related_deployments: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, ingress_namespace, deployment_name, 10) all_stateful_sets = [(namespace, name) for name in stateful_set_names] all_stateful_sets.extend([(ingress_namespace, name) for name in ingress_related_stateful_sets]) for ss_namespace, name in all_stateful_sets: logging.info("Verifying that stateful set %s.%s started...", ss_namespace, name) try: util.wait_for_statefulset(api_client, ss_namespace, name) except: # Collect debug information by running describe util.run([ "kubectl", "-n", ss_namespace, "describe", "statefulsets", name ]) raise # TODO(jlewi): We should verify that the ingress is created and healthy. for deployment_name in knative_related_deployments: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)
def test_kf_is_ready(record_xml_attribute, namespace, use_basic_auth, app_path, cluster_name): """Test that Kubeflow was successfully deployed. Args: namespace: The namespace Kubeflow is deployed to. """ set_logging() util.set_pytest_junit(record_xml_attribute, "test_kf_is_ready") kfctl_aws_util.aws_auth_load_kubeconfig(cluster_name) api_client = deploy_utils.create_k8s_client() # Verify that components are actually deployed. deployment_names = [] stateful_set_names = [] platform, _ = get_platform_app_name(app_path) # TODO(PatrickXYS): not sure why istio-galley can't found ingress_related_deployments = [ "cluster-local-gateway", "istio-citadel", "istio-ingressgateway", "istio-pilot", "istio-policy", "istio-sidecar-injector", "istio-telemetry", "prometheus", ] ingress_related_stateful_sets = [] knative_namespace = "knative-serving" knative_related_deployments = [ "activator", "autoscaler", "autoscaler-hpa", "controller", "networking-istio", "webhook", ] if platform == "gcp": deployment_names.extend(["cloud-endpoints-controller"]) stateful_set_names.extend(["kfserving-controller-manager"]) if use_basic_auth: deployment_names.extend(["basic-auth-login"]) ingress_related_stateful_sets.extend(["backend-updater"]) else: ingress_related_deployments.extend(["iap-enabler"]) ingress_related_stateful_sets.extend(["backend-updater"]) elif platform == "existing_arrikto": deployment_names.extend(["dex"]) ingress_related_deployments.extend(["authservice"]) knative_related_deployments = [] elif platform == "aws": # TODO(PatrickXYS): Extend List with AWS Deployment deployment_names.extend(["alb-ingress-controller"]) # TODO(jlewi): Might want to parallelize this. for deployment_name in deployment_names: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name, 10) ingress_namespace = "istio-system" for deployment_name in ingress_related_deployments: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, ingress_namespace, deployment_name, 10) all_stateful_sets = [(namespace, name) for name in stateful_set_names] all_stateful_sets.extend([(ingress_namespace, name) for name in ingress_related_stateful_sets]) for ss_namespace, name in all_stateful_sets: logging.info("Verifying that stateful set %s.%s started...", ss_namespace, name) try: util.wait_for_statefulset(api_client, ss_namespace, name) except: # Collect debug information by running describe util.run([ "kubectl", "-n", ss_namespace, "describe", "statefulsets", name ]) raise ingress_names = ["istio-ingress"] # Check if Ingress is Ready and Healthy if platform in ["aws"]: for ingress_name in ingress_names: logging.info("Verifying that ingress %s started...", ingress_name) util.wait_for_ingress(api_client, ingress_namespace, ingress_name, 10) for deployment_name in knative_related_deployments: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10) # Check if Dex is Ready and Healthy dex_deployment_names = ["dex"] dex_namespace = "auth" for dex_deployment_name in dex_deployment_names: logging.info("Verifying that deployment %s started...", dex_deployment_name) util.wait_for_deployment(api_client, dex_namespace, dex_deployment_name, 10) # Check if Cert-Manager is Ready and Healthy cert_manager_deployment_names = [ "cert-manager", "cert-manager-cainjector", "cert-manager-webhook", ] cert_manager_namespace = "cert-manager" for cert_manager_deployment_name in cert_manager_deployment_names: logging.info("Verifying that deployment %s started...", cert_manager_deployment_name) util.wait_for_deployment(api_client, cert_manager_namespace, cert_manager_deployment_name, 10)