def __init__(self): self.name = self.__class__.__name__ super(AWSIPI, self).__init__() force_download = ( config.RUN['cli_params'].get('deploy') and config.DEPLOYMENT['force_download_installer'] ) self.installer = utils.get_openshift_installer( config.DEPLOYMENT['installer_version'], force_download=force_download )
def download_installer(self): """ Method to download installer Returns: str: path to the installer """ force_download = (config.RUN['cli_params'].get('deploy') and config.DEPLOYMENT['force_download_installer']) return utils.get_openshift_installer( config.DEPLOYMENT['installer_version'], force_download=force_download)
def download_installer(self): """ Method to download installer Returns: str: path to the installer """ force_download = (config.RUN["cli_params"].get("deploy") and config.DEPLOYMENT["force_download_installer"]) return utils.get_openshift_installer( config.DEPLOYMENT["installer_version"], force_download=force_download)
def destroy_cluster(self, log_level="DEBUG"): """ Destroy OCP cluster specific to vSphere IPI Args: log_level (str): log level openshift-installer (default: DEBUG) """ force_download = config.DEPLOYMENT["force_download_installer"] installer = get_openshift_installer( config.DEPLOYMENT["installer_version"], force_download=force_download ) try: run_cmd( f"{installer} destroy cluster " f"--dir {self.cluster_path} " f"--log-level {log_level}", timeout=3600, ) except CommandFailed as e: logger.error(e)
def aws_cleanup(): parser = argparse.ArgumentParser( description='AWS overall resources cleanup according to running time') parser.add_argument('--hours', type=hour_valid, action='store', required=True, help=""" Maximum running time of the cluster (in hours). Clusters older than this will be deleted. The minimum is 10 hours """) parser.add_argument( '--region', action='store', required=False, help="The name of the AWS region to delete the resources from") parser.add_argument('--prefix', action='append', required=False, type=prefix_hour_mapping, help=""" Additional prefix:hour combo to treat as a special rule. Clusters starting with this prefix will only be cleaned up if their runtime exceeds the provided hour(this takes precedence over the value provided to --hours). Note: if you want to skip cleanup of a cluster entirely you can use 'never' for the hour. Example: --prefix foo:24 --prefix bar:48 --prefix foobar:never """) parser.add_argument('--force', action='store_true', required=False, help=""" Force cluster cleanup. User will not be prompted for confirmation. WARNING: this utility is destructive, only use this option if you know what you are doing. """) args = parser.parse_args() if not args.force: confirmation = input( 'Careful! This action could be highly destructive. ' 'Are you sure you want to proceed? ') assert confirmation == defaults.CONFIRMATION_ANSWER, ( "Wrong confirmation answer. Exiting") prefixes_hours_to_spare = defaults.CLUSTER_PREFIXES_SPECIAL_RULES if args.prefix: for prefix, hours in args.prefix: logger.info("Adding special rule for prefix '%s' with hours %s", prefix, hours) prefixes_hours_to_spare.update({prefix: hours}) time_to_delete = args.hours * 60 * 60 region = defaults.AWS_REGION if not args.region else args.region clusters_to_delete, cloudformation_vpcs = get_clusters_to_delete( time_to_delete=time_to_delete, region_name=region, prefixes_hours_to_spare=prefixes_hours_to_spare, ) if not clusters_to_delete: logger.info("No clusters to delete") else: logger.info("Deleting clusters: %s", clusters_to_delete) get_openshift_installer() procs = [] for cluster in clusters_to_delete: cluster_name = cluster.rsplit('-', 1)[0] logger.info(f"Deleting cluster {cluster_name}") proc = threading.Thread(target=cleanup, args=(cluster_name, cluster)) proc.start() procs.append(proc) for p in procs: p.join() if cloudformation_vpcs: logger.warning("The following cloudformation VPCs were found: %s", cloudformation_vpcs)
def cluster(request): log.info(f"All logs located at {log_path}") log.info("Running OCS basic installation") cluster_path = config.ENV_DATA['cluster_path'] deploy = config.RUN['cli_params']['deploy'] teardown = config.RUN['cli_params']['teardown'] # Add a finalizer to teardown the cluster after test execution is finished if teardown: request.addfinalizer(cluster_teardown) log.info("Will teardown cluster because --teardown was provided") # Test cluster access and if exist just skip the deployment. if is_cluster_running(cluster_path): log.info("The installation is skipped because the cluster is running") return elif teardown and not deploy: log.info("Attempting teardown of non-accessible cluster: %s", cluster_path) return elif not deploy and not teardown: msg = "The given cluster can not be connected to: {}. ".format( cluster_path) msg += "Provide a valid --cluster-path or use --deploy to deploy a new cluster" pytest.fail(msg) elif not system.is_path_empty(cluster_path) and deploy: msg = "The given cluster path is not empty: {}. ".format(cluster_path) msg += "Provide an empty --cluster-path and --deploy to deploy a new cluster" pytest.fail(msg) else: log.info( "A testing cluster will be deployed and cluster information stored at: %s", cluster_path) # Generate install-config from template log.info("Generating install-config") pull_secret_path = os.path.join(constants.TOP_DIR, "data", "pull-secret") # TODO: check for supported platform and raise the exception if not # supported. Currently we support just AWS. _templating = templating.Templating() install_config_str = _templating.render_template("install-config.yaml.j2", config.ENV_DATA) # Log the install config *before* adding the pull secret, so we don't leak # sensitive data. log.info(f"Install config: \n{install_config_str}") # Parse the rendered YAML so that we can manipulate the object directly install_config_obj = yaml.safe_load(install_config_str) with open(pull_secret_path, "r") as f: # Parse, then unparse, the JSON file. # We do this for two reasons: to ensure it is well-formatted, and # also to ensure it ends up as a single line. install_config_obj['pullSecret'] = json.dumps(json.loads(f.read())) install_config_str = yaml.safe_dump(install_config_obj) install_config = os.path.join(cluster_path, "install-config.yaml") with open(install_config, "w") as f: f.write(install_config_str) # Download installer installer = get_openshift_installer(config.DEPLOYMENT['installer_version']) # Download client get_openshift_client() # Deploy cluster log.info("Deploying cluster") run_cmd(f"{installer} create cluster " f"--dir {cluster_path} " f"--log-level debug") # Test cluster access if not OCP.set_kubeconfig( os.path.join(cluster_path, config.RUN.get('kubeconfig_location'))): pytest.fail("Cluster is not available!") # TODO: Create cluster object, add to config.ENV_DATA for other tests to # utilize. # Determine worker pattern and create ebs volumes with open(os.path.join(cluster_path, "terraform.tfvars")) as f: tfvars = json.load(f) cluster_id = tfvars['cluster_id'] worker_pattern = f'{cluster_id}-worker*' log.info(f'Worker pattern: {worker_pattern}') create_ebs_volumes(worker_pattern, region_name=config.ENV_DATA['region']) # render templates and create resources create_oc_resource('common.yaml', cluster_path, _templating, config.ENV_DATA) run_cmd(f'oc label namespace {config.ENV_DATA["cluster_namespace"]} ' f'"openshift.io/cluster-monitoring=true"') run_cmd(f"oc policy add-role-to-user view " f"system:serviceaccount:openshift-monitoring:prometheus-k8s " f"-n {config.ENV_DATA['cluster_namespace']}") apply_oc_resource('csi-nodeplugin-rbac_rbd.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/") apply_oc_resource('csi-provisioner-rbac_rbd.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/rbd/") apply_oc_resource('csi-nodeplugin-rbac_cephfs.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/") apply_oc_resource('csi-provisioner-rbac_cephfs.yaml', cluster_path, _templating, config.ENV_DATA, template_dir="ocs-deployment/csi/cephfs/") # Increased to 15 seconds as 10 is not enough # TODO: do the sampler function and check if resource exist wait_time = 15 log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource('operator-openshift-with-csi.yaml', cluster_path, _templating, config.ENV_DATA) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-operator " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s") run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-discover " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s") create_oc_resource('cluster.yaml', cluster_path, _templating, config.ENV_DATA) POD = ocp.OCP(kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']) CFS = ocp.OCP(kind=constants.CEPHFILESYSTEM, namespace=config.ENV_DATA['cluster_namespace']) # Check for the Running status of Ceph Pods run_cmd(f"oc wait --for condition=ready pod " f"-l app=rook-ceph-agent " f"-n {config.ENV_DATA['cluster_namespace']} " f"--timeout=120s") assert POD.wait_for_resource(condition='Running', selector='app=rook-ceph-mon', resource_count=3, timeout=600) assert POD.wait_for_resource(condition='Running', selector='app=rook-ceph-mgr', timeout=600) assert POD.wait_for_resource(condition='Running', selector='app=rook-ceph-osd', resource_count=3, timeout=600) create_oc_resource('toolbox.yaml', cluster_path, _templating, config.ENV_DATA) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) create_oc_resource('storage-manifest.yaml', cluster_path, _templating, config.ENV_DATA) create_oc_resource("service-monitor.yaml", cluster_path, _templating, config.ENV_DATA) create_oc_resource("prometheus-rules.yaml", cluster_path, _templating, config.ENV_DATA) log.info(f"Waiting {wait_time} seconds...") time.sleep(wait_time) # Create MDS pods for CephFileSystem fs_data = templating.load_yaml_to_dict(constants.CEPHFILESYSTEM_YAML) fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace'] ceph_obj = OCS(**fs_data) ceph_obj.create() assert POD.wait_for_resource(condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds', resource_count=2, timeout=600) # Check for CephFilesystem creation in ocp cfs_data = CFS.get() cfs_name = cfs_data['items'][0]['metadata']['name'] if helpers.validate_cephfilesystem(cfs_name): log.info(f"MDS deployment is successful!") defaults.CEPHFILESYSTEM_NAME = cfs_name else: log.error(f"MDS deployment Failed! Please check logs!") # Verify health of ceph cluster # TODO: move destroy cluster logic to new CLI usage pattern? log.info("Done creating rook resources, waiting for HEALTH_OK") assert ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'])
def aws_cleanup(): parser = argparse.ArgumentParser( description='AWS overall resources cleanup according to running time') parser.add_argument( '--hours', type=int, nargs=1, action='append', required=True, help= "Maximum running time of the cluster (in hours). Clusters older than this " "will be deleted. The minimum is 10 hours") parser.add_argument( '--region', nargs=1, action='append', required=False, help="The name of the AWS region to delete the resources from") parser.add_argument( '--force', action='store_true', required=False, help="Force cluster cleanup. " "User will not be prompted for confirmation. " "WARNING: this utility is destructive, only use this option if " "you know what you are doing.") parser.add_argument('--skip-prefixes', action='store_true', required=False, help="Skip prompt for additional prefixes to spare") logging.basicConfig(level=logging.DEBUG) args = parser.parse_args() if not args.force: confirmation = input( 'Careful! This action could be highly destructive. ' 'Are you sure you want to proceed? ') assert confirmation == defaults.CONFIRMATION_ANSWER, ( "Wrong confirmation answer. Exiting") prefixes_hours_to_spare = defaults.CLUSTER_PREFIXES_TO_EXCLUDE_FROM_DELETION if not args.skip_prefixes: prefixes_hours = input( "Press Enter if there are no cluster prefixes to spare.\n" "If you would like the cleanup to spare specific cluster prefixes, " "please enter them along with the time allowed for these to be kept " "running, in a dictionary representation.\nAn example: " "{\'prefix1\': 36, \'prefix2\': 48}\" ") if prefixes_hours: prefixes_hours_to_spare = eval(prefixes_hours) time_to_delete = args.hours[0][0] assert time_to_delete > defaults.MINIMUM_CLUSTER_RUNNING_TIME_FOR_DELETION, ( "Number of hours is lower than the required minimum. Exiting") time_to_delete = time_to_delete * 60 * 60 region = defaults.AWS_REGION if not args.region else args.region[0][0] clusters_to_delete, cloudformation_vpcs = get_clusters_to_delete( time_to_delete=time_to_delete, region_name=region, prefixes_hours_to_spare=prefixes_hours_to_spare, ) if not clusters_to_delete: logger.info("No clusters to delete") else: get_openshift_installer() procs = [] for cluster in clusters_to_delete: cluster_name = cluster.rsplit('-', 1)[0] logger.info(f"Deleting cluster {cluster_name}") proc = threading.Thread(target=cleanup, args=(cluster_name, cluster)) proc.start() procs.append(proc) for p in procs: p.join() if cloudformation_vpcs: logger.warning("The following cloudformation VPCs were found: %s", cloudformation_vpcs)
def aws_cleanup(): parser = argparse.ArgumentParser( description="AWS overall resources cleanup according to running time") parser.add_argument( "--hours", type=hour_valid, action="store", required=True, help=""" Maximum running time of the cluster (in hours). Clusters older than this will be deleted. The minimum is 10 hours """, ) parser.add_argument( "--region", action="store", required=False, help="The name of the AWS region to delete the resources from", ) parser.add_argument( "--prefix", action="append", required=False, type=prefix_hour_mapping, help=""" Additional prefix:hour combo to treat as a special rule. Clusters starting with this prefix will only be cleaned up if their runtime exceeds the provided hour(this takes precedence over the value provided to --hours). Note: if you want to skip cleanup of a cluster entirely you can use 'never' for the hour. Example: --prefix foo:24 --prefix bar:48 --prefix foobar:never """, ) parser.add_argument( "--force", action="store_true", required=False, help=""" Force cluster cleanup. User will not be prompted for confirmation. WARNING: this utility is destructive, only use this option if you know what you are doing. """, ) args = parser.parse_args() if not args.force: confirmation = input( "Careful! This action could be highly destructive. " "Are you sure you want to proceed? ") assert (confirmation == defaults.CONFIRMATION_ANSWER ), "Wrong confirmation answer. Exiting" prefixes_hours_to_spare = defaults.CLUSTER_PREFIXES_SPECIAL_RULES if args.prefix: for prefix, hours in args.prefix: logger.info("Adding special rule for prefix '%s' with hours %s", prefix, hours) prefixes_hours_to_spare.update({prefix: hours}) time_to_delete = args.hours * 60 * 60 region = defaults.AWS_REGION if not args.region else args.region clusters_to_delete, cf_clusters_to_delete, remaining_clusters = get_clusters( time_to_delete=time_to_delete, region_name=region, prefixes_hours_to_spare=prefixes_hours_to_spare, ) if not clusters_to_delete: logger.info("No clusters to delete") else: logger.info("Deleting clusters: %s", clusters_to_delete) get_openshift_installer() procs = [] failed_deletions = [] for cluster in clusters_to_delete: cluster_name = cluster.rsplit("-", 1)[0] logger.info(f"Deleting cluster {cluster_name}") proc = threading.Thread(target=cleanup, args=(cluster_name, cluster, False, failed_deletions)) proc.start() procs.append(proc) for p in procs: p.join() for cluster in cf_clusters_to_delete: cluster_name = cluster.rsplit("-", 1)[0] logger.info(f"Deleting UPI cluster {cluster_name}") proc = threading.Thread(target=cleanup, args=(cluster_name, cluster, True, failed_deletions)) proc.start() procs.append(proc) for p in procs: p.join() logger.info("Remaining clusters: %s", remaining_clusters) filename = "failed_cluster_deletions.txt" content = "None\n" if failed_deletions: logger.error("Failed cluster deletions: %s", failed_deletions) content = "" for cluster in failed_deletions: content += f"{cluster}\n" with open(filename, "w") as f: f.write(content)