def set_kubeconfig(kubeconfig_path): """ Export environment variable KUBECONFIG for future calls of OC commands or other API calls Args: kubeconfig_path (str): path to kubeconfig file to be exported Returns: boolean: True if successfully connected to cluster, False otherwise """ # Test cluster access log.info("Testing access to cluster with %s", kubeconfig_path) if not os.path.isfile(kubeconfig_path): log.warning("The kubeconfig file %s doesn't exist!", kubeconfig_path) return False os.environ['KUBECONFIG'] = kubeconfig_path try: run_cmd("oc cluster-info") except CommandFailed as ex: log.error("Cluster is not ready to use: %s", ex) return False log.info("Access to cluster is OK!") return True
def create_oc_resource( template_name, cluster_path, _templating, template_data={}, template_dir="ocs-deployment", ): """ Create an oc resource after rendering the specified template with the rook data from cluster_conf. Args: template_name (str): Name of the ocs-deployment config template cluster_path (str): Path to cluster directory, where files will be written _templating (Templating): Object of Templating class used for templating template_data (dict): Data for render template (default: {}) template_dir (str): Directory under templates dir where template exists (default: ocs-deployment) """ template_path = os.path.join(template_dir, template_name) template = _templating.render_template(template_path, template_data) cfg_file = os.path.join(cluster_path, template_name) with open(cfg_file, "w") as f: f.write(template) log.info(f"Creating rook resource from {template_name}") run_cmd(f"oc create -f {cfg_file}")
def ceph_health_check(): """ Exec `ceph health` cmd on tools pod to determine health of cluster. Raises: CephHealthException: If the ceph health returned is not HEALTH_OK CommandFailed: If the command to retrieve the tools pod name or the command to get ceph health returns a non-zero exit code Returns: 0 if HEALTH_OK """ # TODO: grab namespace-name from rook data, default to openshift-storage namespace = "openshift-storage" run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-tools " f"-n {namespace} " f"--timeout=120s") tools_pod = run_cmd(f"oc -n {namespace} get pod -l 'app=rook-ceph-tools' " f"-o jsonpath='{{.items[0].metadata.name}}'") health = run_cmd(f"oc -n {namespace} exec {tools_pod} ceph health") if health.strip() == "HEALTH_OK": log.info("HEALTH_OK, install successful.") return 0 else: raise CephHealthException( f"Ceph cluster health is not OK. Health: {health}")
def ceph_health_check(namespace=default.ROOK_CLUSTER_NAMESPACE): """ Exec `ceph health` cmd on tools pod to determine health of cluster. Args: namespace (str): Namespace of of OCS (default: default.ROOK_CLUSER_NAMESPACE) Raises: CephHealthException: If the ceph health returned is not HEALTH_OK CommandFailed: If the command to retrieve the tools pod name or the command to get ceph health returns a non-zero exit code Returns: 0 if HEALTH_OK """ run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-tools " f"-n {namespace} " f"--timeout=120s") tools_pod = run_cmd(f"oc -n {namespace} get pod -l 'app=rook-ceph-tools' " f"-o jsonpath='{{.items[0].metadata.name}}'") health = run_cmd(f"oc -n {namespace} exec {tools_pod} ceph health") if health.strip() == "HEALTH_OK": log.info("HEALTH_OK, install successful.") return 0 else: raise CephHealthException( f"Ceph cluster health is not OK. Health: {health}")
def exec_oc_cmd(self, command): """ Executing 'oc' command Args: command (str): The command to execute (e.g. create -f file.yaml) without the initial 'oc' at the beginning Returns: dict: Dictionary represents a returned yaml file """ oc_cmd = "oc " kubeconfig = os.getenv('KUBECONFIG') if self.namespace: oc_cmd += f"-n {self.namespace} " if kubeconfig: oc_cmd += f"--kubeconfig {kubeconfig} " oc_cmd += command out = run_cmd(cmd=oc_cmd) try: if out.startswith('hints = '): out = out[out.index('{'):] except ValueError: pass return yaml.safe_load(out)
def new_project(self, project_name): """ Creates a new project Args: project_name (str): Name of the project to be created Returns: bool: True in case project creation succeeded, False otherwise """ command = f"oc new-project {project_name}" if f'Now using project "{project_name}"' in run_cmd(f"{command}"): return True return False
def switch_to_project(project_name): """ Switch to another project Args: project_name (str): Name of the project to be switched to Returns: bool: True on success, False otherwise """ log.info(f'Switching to project {project_name}') cmd = f'oc project {project_name}' success_msgs = [ f'Now using project "{project_name}"', f'Already on project "{project_name}"' ] ret = run_cmd(cmd) if any(msg in ret for msg in success_msgs): return True return False
def exec_oc_cmd(self, command): """ Executing 'oc' command Args: command (str): The command to execute (e.g. create -f file.yaml) without the initial 'oc' at the beginning Returns: Munch Obj: this object represents a returned yaml file """ oc_cmd = "oc " kubeconfig = os.getenv('KUBECONFIG') if self.namespace: oc_cmd += f"-n {self.namespace} " if kubeconfig: oc_cmd += f"--kubeconfig {kubeconfig} " oc_cmd += command out = run_cmd(cmd=oc_cmd) return munchify(yaml.safe_load(out))
def run(**kwargs): log.info("Running OCS basic installation") test_data = kwargs.get('test_data') cluster_path = test_data.get('cluster-path') # Test cluster access and if exist just skip the deployment. if cluster_path and OCP.set_kubeconfig( os.path.join(cluster_path, default.KUBECONFIG_LOCATION)): return TestStatus.SKIPPED config = kwargs.get('config') cluster_conf = kwargs.get('cluster_conf') workers = masters = aws_region = None if cluster_conf: cluster_details = cluster_conf.get('aws', {}).get('cluster', {}) workers = cluster_details.get('workers') masters = cluster_details.get('masters') aws_region = cluster_details.get('region', default.AWS_REGION) # Generate install-config from template log.info("Generating install-config") # TODO: determine better place to create cluster directories - (log dir?) cluster_dir_parent = "/tmp" cluster_name = test_data.get('cluster-name') base_cluster_name = test_data.get('cluster-name', default.CLUSTER_NAME) cid = random.randint(10000, 99999) if not (cluster_name and cluster_path): cluster_name = f"{base_cluster_name}-{cid}" if not cluster_path: cluster_path = os.path.join(cluster_dir_parent, cluster_name) run_cmd(f"mkdir -p {cluster_path}") pull_secret_path = os.path.join(templating.TOP_DIR, "data", "pull-secret") with open(pull_secret_path, "r") as f: pull_secret = f.readline() data = { "cluster_name": cluster_name, "pull_secret": pull_secret, } if workers: data.update({'worker_replicas': workers}) if masters: data.update({'master_replicas': masters}) if aws_region: data.update({'region': aws_region}) _templating = templating.Templating() template = _templating.render_template("install-config.yaml.j2", data) log.info(f"Install config: \n{template}") install_config = os.path.join(cluster_path, "install-config.yaml") with open(install_config, "w") as f: f.write(template) # Download installer installer_filename = "openshift-install" tarball = f"{installer_filename}.tar.gz" if os.path.isfile(installer_filename): log.info("Installer exists, skipping download") else: log.info("Downloading openshift installer") ver = config.get('installer-version', default.INSTALLER_VERSION) if platform.system() == "Darwin": os_type = "mac" elif platform.system() == "Linux": os_type = "linux" else: raise UnsupportedOSType url = (f"https://mirror.openshift.com/pub/openshift-v4/clients/ocp/" f"{ver}/openshift-install-{os_type}-{ver}.tar.gz") download_file(url, tarball) run_cmd(f"tar xzvf {tarball}") # Deploy cluster log.info("Deploying cluster") run_cmd(f"./openshift-install create cluster " f"--dir {cluster_path} " f"--log-level debug") # Test cluster access if not OCP.set_kubeconfig( os.path.join(cluster_path, default.KUBECONFIG_LOCATION)): return TestStatus.FAILED # TODO: Create cluster object, add to test_data for other tests to utilize # Determine worker pattern and create ebs volumes with open(os.path.join(cluster_path, "terraform.tfvars")) as f: tfvars = json.load(f) cluster_id = tfvars['cluster_id'] worker_pattern = f'{cluster_id}-worker*' log.info(f'Worker pattern: {worker_pattern}') region_name = aws_region if aws_region else default.AWS_REGION create_ebs_volumes(worker_pattern, region_name=region_name) # Use Rook to install Ceph cluster # retrieve rook config from cluster_conf rook_data = {} if cluster_conf: rook_data = cluster_conf.get('rook', {}) # render templates and create resources create_oc_resource('common.yaml', rook_data, cluster_path, _templating) run_cmd('oc label namespace openshift-storage ' '"openshift.io/cluster-monitoring=true"') run_cmd("oc policy add-role-to-user view " "system:serviceaccount:openshift-monitoring:prometheus-k8s " "-n openshift-storage") create_oc_resource('operator-openshift.yaml', rook_data, cluster_path, _templating) wait_time = 5 log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd("oc wait --for condition=ready pod " "-l app=rook-ceph-operator " "-n openshift-storage " "--timeout=120s") run_cmd("oc wait --for condition=ready pod " "-l app=rook-ceph-agent " "-n openshift-storage " "--timeout=120s") run_cmd("oc wait --for condition=ready pod " "-l app=rook-discover " "-n openshift-storage " "--timeout=120s") create_oc_resource('cluster.yaml', rook_data, cluster_path, _templating) create_oc_resource('toolbox.yaml', rook_data, cluster_path, _templating) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource('storage-manifest.yaml', rook_data, cluster_path, _templating) create_oc_resource("service-monitor.yaml", rook_data, cluster_path, _templating) create_oc_resource("prometheus-rules.yaml", rook_data, cluster_path, _templating) # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? log.info("Done creating rook resources, waiting for HEALTH_OK") rc = ceph_health_check() # Destroy cluster (if configured) destroy_cmd = (f"./openshift-install destroy cluster " f"--dir {cluster_path} " f"--log-level debug") if config.get("destroy-cluster"): log.info("Destroying cluster") run_cmd(destroy_cmd) # TODO: destroy volumes created os.remove(installer_filename) os.remove(tarball) else: log.info(f"Cluster directory is located here: {cluster_path}") log.info( f"Skipping cluster destroy. " f"To manually destroy the cluster execute the following cmd:\n" f"{destroy_cmd}") return rc
def run(**kwargs): log.info("Running OCS basic installation") test_data = kwargs.get('test_data') cluster_path = test_data.get('cluster-path') # Test cluster access and if exist just skip the deployment. if cluster_path and OCP.set_kubeconfig( os.path.join(cluster_path, default.KUBECONFIG_LOCATION)): return TestStatus.SKIPPED config = kwargs.get('config') cluster_conf = kwargs.get('cluster_conf', {}) env_data = deepcopy(default.ENV_DATA) custom_env_data = cluster_conf.get('env_data', {}) # Generate install-config from template log.info("Generating install-config") # TODO: determine better place to create cluster directories - (log dir?) cluster_dir_parent = "/tmp" cluster_name = test_data.get('cluster-name') base_cluster_name = test_data.get('cluster-name', default.CLUSTER_NAME) cid = random.randint(10000, 99999) if not (cluster_name and cluster_path): cluster_name = f"{base_cluster_name}-{cid}" if not cluster_path: cluster_path = os.path.join(cluster_dir_parent, cluster_name) run_cmd(f"mkdir -p {cluster_path}") pull_secret_path = os.path.join(templating.TOP_DIR, "data", "pull-secret") with open(pull_secret_path, "r") as f: pull_secret = f.readline() custom_env_data.update({ 'pull_secret': pull_secret, 'cluster_name': cluster_name, }) if custom_env_data: env_data.update(custom_env_data) # TODO: check for supported platform and raise the exception if not # supported. Currently we support just AWS. _templating = templating.Templating() template = _templating.render_template("install-config.yaml.j2", env_data) log.info(f"Install config: \n{template}") install_config = os.path.join(cluster_path, "install-config.yaml") with open(install_config, "w") as f: f.write(template) # Download installer version = config.get('installer-version', default.INSTALLER_VERSION) installer = download_openshift_installer(version) # Deploy cluster log.info("Deploying cluster") run_cmd(f"./{installer} create cluster " f"--dir {cluster_path} " f"--log-level debug") # Test cluster access if not OCP.set_kubeconfig( os.path.join(cluster_path, default.KUBECONFIG_LOCATION)): return TestStatus.FAILED # TODO: Create cluster object, add to test_data for other tests to utilize # Determine worker pattern and create ebs volumes with open(os.path.join(cluster_path, "terraform.tfvars")) as f: tfvars = json.load(f) cluster_id = tfvars['cluster_id'] worker_pattern = f'{cluster_id}-worker*' log.info(f'Worker pattern: {worker_pattern}') create_ebs_volumes(worker_pattern, region_name=env_data['region']) # render templates and create resources create_oc_resource('common.yaml', cluster_path, _templating, env_data) run_cmd(f'oc label namespace {env_data["cluster_namespace"]} ' f'"openshift.io/cluster-monitoring=true"') run_cmd(f"oc policy add-role-to-user view " f"system:serviceaccount:openshift-monitoring:prometheus-k8s " f"-n {env_data['cluster_namespace']}") create_oc_resource('operator-openshift.yaml', cluster_path, _templating, env_data) # Increased to 10 seconds as 5 is not enough # TODO: do the sampler function and check if resource exist wait_time = 10 log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-operator " f"-n {env_data['cluster_namespace']} " f"--timeout=120s") run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-agent " f"-n {env_data['cluster_namespace']} " f"--timeout=120s") run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-discover " f"-n {env_data['cluster_namespace']} " f"--timeout=120s") create_oc_resource('cluster.yaml', cluster_path, _templating, env_data) create_oc_resource('toolbox.yaml', cluster_path, _templating, env_data) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource('storage-manifest.yaml', cluster_path, _templating, env_data) create_oc_resource("service-monitor.yaml", cluster_path, _templating, env_data) create_oc_resource("prometheus-rules.yaml", cluster_path, _templating, env_data) # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? log.info("Done creating rook resources, waiting for HEALTH_OK") rc = ceph_health_check(namespace=env_data['cluster_namespace']) return rc
def test_deployment(self): log.info("Running OCS basic installation") cluster_path = ENV_DATA['cluster_path'] # Test cluster access and if exist just skip the deployment. if RUN['cli_params'].get('cluster_path') and OCP.set_kubeconfig( os.path.join(cluster_path, RUN.get('kubeconfig_location')) ): pytest.skip( "The installation is skipped cause the cluster is running" ) # Generate install-config from template log.info("Generating install-config") run_cmd(f"mkdir -p {cluster_path}") pull_secret_path = os.path.join( TOP_DIR, "data", "pull-secret" ) # TODO: check for supported platform and raise the exception if not # supported. Currently we support just AWS. _templating = templating.Templating() install_config_str = _templating.render_template( "install-config.yaml.j2", ENV_DATA ) # Parse the rendered YAML so that we can manipulate the object directly install_config_obj = yaml.safe_load(install_config_str) with open(pull_secret_path, "r") as f: # Parse, then unparse, the JSON file. # We do this for two reasons: to ensure it is well-formatted, and # also to ensure it ends up as a single line. install_config_obj['pullSecret'] = json.dumps(json.loads(f.read())) install_config_str = yaml.safe_dump(install_config_obj) log.info(f"Install config: \n{install_config_str}") install_config = os.path.join(cluster_path, "install-config.yaml") with open(install_config, "w") as f: f.write(install_config_str) # Download installer installer = get_openshift_installer( DEPLOYMENT['installer_version'] ) # Download client get_openshift_client() # Deploy cluster log.info("Deploying cluster") run_cmd( f"{installer} create cluster " f"--dir {cluster_path} " f"--log-level debug" ) # Test cluster access if not OCP.set_kubeconfig( os.path.join(cluster_path, RUN.get('kubeconfig_location')) ): pytest.fail("Cluster is not available!") # TODO: Create cluster object, add to ENV_DATA for other tests to # utilize. # Determine worker pattern and create ebs volumes with open(os.path.join(cluster_path, "terraform.tfvars")) as f: tfvars = json.load(f) cluster_id = tfvars['cluster_id'] worker_pattern = f'{cluster_id}-worker*' log.info(f'Worker pattern: {worker_pattern}') create_ebs_volumes(worker_pattern, region_name=ENV_DATA['region']) # render templates and create resources create_oc_resource('common.yaml', cluster_path, _templating, ENV_DATA) run_cmd( f'oc label namespace {ENV_DATA["cluster_namespace"]} ' f'"openshift.io/cluster-monitoring=true"' ) run_cmd( f"oc policy add-role-to-user view " f"system:serviceaccount:openshift-monitoring:prometheus-k8s " f"-n {ENV_DATA['cluster_namespace']}" ) apply_oc_resource( 'csi-nodeplugin-rbac_rbd.yaml', cluster_path, _templating, ENV_DATA, template_dir="ocs-deployment/csi/rbd/" ) apply_oc_resource( 'csi-provisioner-rbac_rbd.yaml', cluster_path, _templating, ENV_DATA, template_dir="ocs-deployment/csi/rbd/" ) apply_oc_resource( 'csi-nodeplugin-rbac_cephfs.yaml', cluster_path, _templating, ENV_DATA, template_dir="ocs-deployment/csi/cephfs/" ) apply_oc_resource( 'csi-provisioner-rbac_cephfs.yaml', cluster_path, _templating, ENV_DATA, template_dir="ocs-deployment/csi/cephfs/" ) # Increased to 15 seconds as 10 is not enough # TODO: do the sampler function and check if resource exist wait_time = 15 log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource( 'operator-openshift-with-csi.yaml', cluster_path, _templating, ENV_DATA ) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-ceph-operator " f"-n {ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-ceph-agent " f"-n {ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) run_cmd( f"oc wait --for condition=ready pod " f"-l app=rook-discover " f"-n {ENV_DATA['cluster_namespace']} " f"--timeout=120s" ) create_oc_resource('cluster.yaml', cluster_path, _templating, ENV_DATA) create_oc_resource('toolbox.yaml', cluster_path, _templating, ENV_DATA) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource( 'storage-manifest.yaml', cluster_path, _templating, ENV_DATA ) create_oc_resource( "service-monitor.yaml", cluster_path, _templating, ENV_DATA ) create_oc_resource( "prometheus-rules.yaml", cluster_path, _templating, ENV_DATA ) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? log.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check(namespace=ENV_DATA['cluster_namespace'])