Ejemplo n.º 1
0
 def __init__(self):
     self.name = self.__class__.__name__
     super(AWSIPI, self).__init__()
     force_download = (
         config.RUN['cli_params'].get('deploy')
         and config.DEPLOYMENT['force_download_installer']
     )
     self.installer = utils.get_openshift_installer(
         config.DEPLOYMENT['installer_version'],
         force_download=force_download
     )
Ejemplo n.º 2
0
    def download_installer(self):
        """
        Method to download installer

        Returns:
            str: path to the installer
        """
        force_download = (config.RUN['cli_params'].get('deploy')
                          and config.DEPLOYMENT['force_download_installer'])
        return utils.get_openshift_installer(
            config.DEPLOYMENT['installer_version'],
            force_download=force_download)
Ejemplo n.º 3
0
    def download_installer(self):
        """
        Method to download installer

        Returns:
            str: path to the installer
        """
        force_download = (config.RUN["cli_params"].get("deploy")
                          and config.DEPLOYMENT["force_download_installer"])
        return utils.get_openshift_installer(
            config.DEPLOYMENT["installer_version"],
            force_download=force_download)
Ejemplo n.º 4
0
    def destroy_cluster(self, log_level="DEBUG"):
        """
        Destroy OCP cluster specific to vSphere IPI

        Args:
            log_level (str): log level openshift-installer (default: DEBUG)
        """
        force_download = config.DEPLOYMENT["force_download_installer"]
        installer = get_openshift_installer(
            config.DEPLOYMENT["installer_version"], force_download=force_download
        )
        try:
            run_cmd(
                f"{installer} destroy cluster "
                f"--dir {self.cluster_path} "
                f"--log-level {log_level}",
                timeout=3600,
            )
        except CommandFailed as e:
            logger.error(e)
Ejemplo n.º 5
0
def aws_cleanup():
    parser = argparse.ArgumentParser(
        description='AWS overall resources cleanup according to running time')
    parser.add_argument('--hours',
                        type=hour_valid,
                        action='store',
                        required=True,
                        help="""
            Maximum running time of the cluster (in hours).
            Clusters older than this will be deleted.
            The minimum is 10 hours
            """)
    parser.add_argument(
        '--region',
        action='store',
        required=False,
        help="The name of the AWS region to delete the resources from")
    parser.add_argument('--prefix',
                        action='append',
                        required=False,
                        type=prefix_hour_mapping,
                        help="""
            Additional prefix:hour combo to treat as a special rule.
            Clusters starting with this prefix will only be cleaned up if
            their runtime exceeds the provided hour(this takes precedence
            over the value provided to --hours). Note: if you want to skip
            cleanup of a cluster entirely you can use 'never' for the hour.
            Example: --prefix foo:24 --prefix bar:48 --prefix foobar:never
            """)
    parser.add_argument('--force',
                        action='store_true',
                        required=False,
                        help="""
            Force cluster cleanup.
            User will not be prompted for confirmation.
            WARNING: this utility is destructive, only use this option if
            you know what you are doing.
            """)
    args = parser.parse_args()

    if not args.force:
        confirmation = input(
            'Careful! This action could be highly destructive. '
            'Are you sure you want to proceed? ')
        assert confirmation == defaults.CONFIRMATION_ANSWER, (
            "Wrong confirmation answer. Exiting")

    prefixes_hours_to_spare = defaults.CLUSTER_PREFIXES_SPECIAL_RULES

    if args.prefix:
        for prefix, hours in args.prefix:
            logger.info("Adding special rule for prefix '%s' with hours %s",
                        prefix, hours)
            prefixes_hours_to_spare.update({prefix: hours})

    time_to_delete = args.hours * 60 * 60
    region = defaults.AWS_REGION if not args.region else args.region
    clusters_to_delete, cloudformation_vpcs = get_clusters_to_delete(
        time_to_delete=time_to_delete,
        region_name=region,
        prefixes_hours_to_spare=prefixes_hours_to_spare,
    )

    if not clusters_to_delete:
        logger.info("No clusters to delete")
    else:
        logger.info("Deleting clusters: %s", clusters_to_delete)
        get_openshift_installer()
    procs = []
    for cluster in clusters_to_delete:
        cluster_name = cluster.rsplit('-', 1)[0]
        logger.info(f"Deleting cluster {cluster_name}")
        proc = threading.Thread(target=cleanup, args=(cluster_name, cluster))
        proc.start()
        procs.append(proc)
    for p in procs:
        p.join()
    if cloudformation_vpcs:
        logger.warning("The following cloudformation VPCs were found: %s",
                       cloudformation_vpcs)
Ejemplo n.º 6
0
def cluster(request):
    log.info(f"All logs located at {log_path}")
    log.info("Running OCS basic installation")
    cluster_path = config.ENV_DATA['cluster_path']
    deploy = config.RUN['cli_params']['deploy']
    teardown = config.RUN['cli_params']['teardown']
    # Add a finalizer to teardown the cluster after test execution is finished
    if teardown:
        request.addfinalizer(cluster_teardown)
        log.info("Will teardown cluster because --teardown was provided")
    # Test cluster access and if exist just skip the deployment.
    if is_cluster_running(cluster_path):
        log.info("The installation is skipped because the cluster is running")
        return
    elif teardown and not deploy:
        log.info("Attempting teardown of non-accessible cluster: %s",
                 cluster_path)
        return
    elif not deploy and not teardown:
        msg = "The given cluster can not be connected to: {}. ".format(
            cluster_path)
        msg += "Provide a valid --cluster-path or use --deploy to deploy a new cluster"
        pytest.fail(msg)
    elif not system.is_path_empty(cluster_path) and deploy:
        msg = "The given cluster path is not empty: {}. ".format(cluster_path)
        msg += "Provide an empty --cluster-path and --deploy to deploy a new cluster"
        pytest.fail(msg)
    else:
        log.info(
            "A testing cluster will be deployed and cluster information stored at: %s",
            cluster_path)

    # Generate install-config from template
    log.info("Generating install-config")
    pull_secret_path = os.path.join(constants.TOP_DIR, "data", "pull-secret")

    # TODO: check for supported platform and raise the exception if not
    # supported. Currently we support just AWS.

    _templating = templating.Templating()
    install_config_str = _templating.render_template("install-config.yaml.j2",
                                                     config.ENV_DATA)
    # Log the install config *before* adding the pull secret, so we don't leak
    # sensitive data.
    log.info(f"Install config: \n{install_config_str}")
    # Parse the rendered YAML so that we can manipulate the object directly
    install_config_obj = yaml.safe_load(install_config_str)
    with open(pull_secret_path, "r") as f:
        # Parse, then unparse, the JSON file.
        # We do this for two reasons: to ensure it is well-formatted, and
        # also to ensure it ends up as a single line.
        install_config_obj['pullSecret'] = json.dumps(json.loads(f.read()))
    install_config_str = yaml.safe_dump(install_config_obj)
    install_config = os.path.join(cluster_path, "install-config.yaml")
    with open(install_config, "w") as f:
        f.write(install_config_str)

    # Download installer
    installer = get_openshift_installer(config.DEPLOYMENT['installer_version'])
    # Download client
    get_openshift_client()

    # Deploy cluster
    log.info("Deploying cluster")
    run_cmd(f"{installer} create cluster "
            f"--dir {cluster_path} "
            f"--log-level debug")

    # Test cluster access
    if not OCP.set_kubeconfig(
            os.path.join(cluster_path, config.RUN.get('kubeconfig_location'))):
        pytest.fail("Cluster is not available!")

    # TODO: Create cluster object, add to config.ENV_DATA for other tests to
    # utilize.
    # Determine worker pattern and create ebs volumes
    with open(os.path.join(cluster_path, "terraform.tfvars")) as f:
        tfvars = json.load(f)

    cluster_id = tfvars['cluster_id']
    worker_pattern = f'{cluster_id}-worker*'
    log.info(f'Worker pattern: {worker_pattern}')
    create_ebs_volumes(worker_pattern, region_name=config.ENV_DATA['region'])

    # render templates and create resources
    create_oc_resource('common.yaml', cluster_path, _templating,
                       config.ENV_DATA)
    run_cmd(f'oc label namespace {config.ENV_DATA["cluster_namespace"]} '
            f'"openshift.io/cluster-monitoring=true"')
    run_cmd(f"oc policy add-role-to-user view "
            f"system:serviceaccount:openshift-monitoring:prometheus-k8s "
            f"-n {config.ENV_DATA['cluster_namespace']}")
    apply_oc_resource('csi-nodeplugin-rbac_rbd.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/rbd/")
    apply_oc_resource('csi-provisioner-rbac_rbd.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/rbd/")
    apply_oc_resource('csi-nodeplugin-rbac_cephfs.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/cephfs/")
    apply_oc_resource('csi-provisioner-rbac_cephfs.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/cephfs/")
    # Increased to 15 seconds as 10 is not enough
    # TODO: do the sampler function and check if resource exist
    wait_time = 15
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    create_oc_resource('operator-openshift-with-csi.yaml', cluster_path,
                       _templating, config.ENV_DATA)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-operator "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s")
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-discover "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s")
    create_oc_resource('cluster.yaml', cluster_path, _templating,
                       config.ENV_DATA)

    POD = ocp.OCP(kind=constants.POD,
                  namespace=config.ENV_DATA['cluster_namespace'])
    CFS = ocp.OCP(kind=constants.CEPHFILESYSTEM,
                  namespace=config.ENV_DATA['cluster_namespace'])

    # Check for the Running status of Ceph Pods
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-agent "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s")
    assert POD.wait_for_resource(condition='Running',
                                 selector='app=rook-ceph-mon',
                                 resource_count=3,
                                 timeout=600)
    assert POD.wait_for_resource(condition='Running',
                                 selector='app=rook-ceph-mgr',
                                 timeout=600)
    assert POD.wait_for_resource(condition='Running',
                                 selector='app=rook-ceph-osd',
                                 resource_count=3,
                                 timeout=600)

    create_oc_resource('toolbox.yaml', cluster_path, _templating,
                       config.ENV_DATA)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    create_oc_resource('storage-manifest.yaml', cluster_path, _templating,
                       config.ENV_DATA)
    create_oc_resource("service-monitor.yaml", cluster_path, _templating,
                       config.ENV_DATA)
    create_oc_resource("prometheus-rules.yaml", cluster_path, _templating,
                       config.ENV_DATA)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)

    # Create MDS pods for CephFileSystem
    fs_data = templating.load_yaml_to_dict(constants.CEPHFILESYSTEM_YAML)
    fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace']

    ceph_obj = OCS(**fs_data)
    ceph_obj.create()
    assert POD.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector='app=rook-ceph-mds',
                                 resource_count=2,
                                 timeout=600)

    # Check for CephFilesystem creation in ocp
    cfs_data = CFS.get()
    cfs_name = cfs_data['items'][0]['metadata']['name']

    if helpers.validate_cephfilesystem(cfs_name):
        log.info(f"MDS deployment is successful!")
        defaults.CEPHFILESYSTEM_NAME = cfs_name
    else:
        log.error(f"MDS deployment Failed! Please check logs!")

    # Verify health of ceph cluster
    # TODO: move destroy cluster logic to new CLI usage pattern?
    log.info("Done creating rook resources, waiting for HEALTH_OK")
    assert ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'])
Ejemplo n.º 7
0
def aws_cleanup():
    parser = argparse.ArgumentParser(
        description='AWS overall resources cleanup according to running time')
    parser.add_argument(
        '--hours',
        type=int,
        nargs=1,
        action='append',
        required=True,
        help=
        "Maximum running time of the cluster (in hours). Clusters older than this "
        "will be deleted. The minimum is 10 hours")
    parser.add_argument(
        '--region',
        nargs=1,
        action='append',
        required=False,
        help="The name of the AWS region to delete the resources from")
    parser.add_argument(
        '--force',
        action='store_true',
        required=False,
        help="Force cluster cleanup. "
        "User will not be prompted for confirmation. "
        "WARNING: this utility is destructive, only use this option if "
        "you know what you are doing.")
    parser.add_argument('--skip-prefixes',
                        action='store_true',
                        required=False,
                        help="Skip prompt for additional prefixes to spare")
    logging.basicConfig(level=logging.DEBUG)
    args = parser.parse_args()

    if not args.force:
        confirmation = input(
            'Careful! This action could be highly destructive. '
            'Are you sure you want to proceed? ')
        assert confirmation == defaults.CONFIRMATION_ANSWER, (
            "Wrong confirmation answer. Exiting")

    prefixes_hours_to_spare = defaults.CLUSTER_PREFIXES_TO_EXCLUDE_FROM_DELETION

    if not args.skip_prefixes:
        prefixes_hours = input(
            "Press Enter if there are no cluster prefixes to spare.\n"
            "If you would like the cleanup to spare specific cluster prefixes, "
            "please enter them along with the time allowed for these to be kept "
            "running, in a dictionary representation.\nAn example: "
            "{\'prefix1\': 36, \'prefix2\': 48}\" ")
        if prefixes_hours:
            prefixes_hours_to_spare = eval(prefixes_hours)
    time_to_delete = args.hours[0][0]
    assert time_to_delete > defaults.MINIMUM_CLUSTER_RUNNING_TIME_FOR_DELETION, (
        "Number of hours is lower than the required minimum. Exiting")
    time_to_delete = time_to_delete * 60 * 60
    region = defaults.AWS_REGION if not args.region else args.region[0][0]
    clusters_to_delete, cloudformation_vpcs = get_clusters_to_delete(
        time_to_delete=time_to_delete,
        region_name=region,
        prefixes_hours_to_spare=prefixes_hours_to_spare,
    )

    if not clusters_to_delete:
        logger.info("No clusters to delete")
    else:
        get_openshift_installer()
    procs = []
    for cluster in clusters_to_delete:
        cluster_name = cluster.rsplit('-', 1)[0]
        logger.info(f"Deleting cluster {cluster_name}")
        proc = threading.Thread(target=cleanup, args=(cluster_name, cluster))
        proc.start()
        procs.append(proc)
    for p in procs:
        p.join()
    if cloudformation_vpcs:
        logger.warning("The following cloudformation VPCs were found: %s",
                       cloudformation_vpcs)
Ejemplo n.º 8
0
def aws_cleanup():
    parser = argparse.ArgumentParser(
        description="AWS overall resources cleanup according to running time")
    parser.add_argument(
        "--hours",
        type=hour_valid,
        action="store",
        required=True,
        help="""
            Maximum running time of the cluster (in hours).
            Clusters older than this will be deleted.
            The minimum is 10 hours
            """,
    )
    parser.add_argument(
        "--region",
        action="store",
        required=False,
        help="The name of the AWS region to delete the resources from",
    )
    parser.add_argument(
        "--prefix",
        action="append",
        required=False,
        type=prefix_hour_mapping,
        help="""
            Additional prefix:hour combo to treat as a special rule.
            Clusters starting with this prefix will only be cleaned up if
            their runtime exceeds the provided hour(this takes precedence
            over the value provided to --hours). Note: if you want to skip
            cleanup of a cluster entirely you can use 'never' for the hour.
            Example: --prefix foo:24 --prefix bar:48 --prefix foobar:never
            """,
    )
    parser.add_argument(
        "--force",
        action="store_true",
        required=False,
        help="""
            Force cluster cleanup.
            User will not be prompted for confirmation.
            WARNING: this utility is destructive, only use this option if
            you know what you are doing.
            """,
    )
    args = parser.parse_args()

    if not args.force:
        confirmation = input(
            "Careful! This action could be highly destructive. "
            "Are you sure you want to proceed? ")
        assert (confirmation == defaults.CONFIRMATION_ANSWER
                ), "Wrong confirmation answer. Exiting"

    prefixes_hours_to_spare = defaults.CLUSTER_PREFIXES_SPECIAL_RULES

    if args.prefix:
        for prefix, hours in args.prefix:
            logger.info("Adding special rule for prefix '%s' with hours %s",
                        prefix, hours)
            prefixes_hours_to_spare.update({prefix: hours})

    time_to_delete = args.hours * 60 * 60
    region = defaults.AWS_REGION if not args.region else args.region
    clusters_to_delete, cf_clusters_to_delete, remaining_clusters = get_clusters(
        time_to_delete=time_to_delete,
        region_name=region,
        prefixes_hours_to_spare=prefixes_hours_to_spare,
    )

    if not clusters_to_delete:
        logger.info("No clusters to delete")
    else:
        logger.info("Deleting clusters: %s", clusters_to_delete)
        get_openshift_installer()
    procs = []
    failed_deletions = []
    for cluster in clusters_to_delete:
        cluster_name = cluster.rsplit("-", 1)[0]
        logger.info(f"Deleting cluster {cluster_name}")
        proc = threading.Thread(target=cleanup,
                                args=(cluster_name, cluster, False,
                                      failed_deletions))
        proc.start()
        procs.append(proc)
    for p in procs:
        p.join()
    for cluster in cf_clusters_to_delete:
        cluster_name = cluster.rsplit("-", 1)[0]
        logger.info(f"Deleting UPI cluster {cluster_name}")
        proc = threading.Thread(target=cleanup,
                                args=(cluster_name, cluster, True,
                                      failed_deletions))
        proc.start()
        procs.append(proc)
    for p in procs:
        p.join()
    logger.info("Remaining clusters: %s", remaining_clusters)
    filename = "failed_cluster_deletions.txt"
    content = "None\n"
    if failed_deletions:
        logger.error("Failed cluster deletions: %s", failed_deletions)
        content = ""
        for cluster in failed_deletions:
            content += f"{cluster}\n"
    with open(filename, "w") as f:
        f.write(content)