Beispiel #1
0
def test_ceph_rgw_metrics_after_metrics_exporter_respin(rgw_deployments):
    """
    RGW metrics should be provided via OCP Prometheus even after
    ocs-metrics-exporter pod is respinned.

    """
    logger.info("Respin ocs-metrics-exporter pod")
    pod_obj = ocp.OCP(kind=constants.POD,
                      namespace=defaults.ROOK_CLUSTER_NAMESPACE)
    metrics_pods = pod_obj.get(
        selector="app.kubernetes.io/name=ocs-metrics-exporter")["items"]
    assert len(metrics_pods) == 1
    metrics_pod_data = metrics_pods[0]
    metrics_pod = OCS(**metrics_pod_data)
    metrics_pod.delete(force=True)

    logger.info("Wait for ocs-metrics-exporter pod to come up")
    assert pod_obj.wait_for_resource(
        condition="Running",
        selector="app.kubernetes.io/name=ocs-metrics-exporter",
        resource_count=1,
        timeout=600,
    )

    logger.info("Collect RGW metrics")
    prometheus = PrometheusAPI()
    list_of_metrics_without_results = metrics.get_missing_metrics(
        prometheus, metrics.ceph_rgw_metrics)
    msg = (
        "OCS Monitoring should provide some value(s) for tested rgw metrics, "
        "so that the list of metrics without results is empty.")
    assert list_of_metrics_without_results == [], msg
 def test_basics_rbd(self, test_fixture):
     """
     Testing basics: secret creation,
     storage class creation and pvc with cephfs
     """
     self.cephfs_secret = templating.load_yaml_to_dict(
         constants.CSI_CEPHFS_SECRET_YAML)
     del self.cephfs_secret['data']['userID']
     del self.cephfs_secret['data']['userKey']
     self.cephfs_secret['data']['adminKey'] = (
         get_admin_key_from_ceph_tools())
     self.cephfs_secret['data']['adminID'] = constants.ADMIN_BASE64
     logging.info(self.cephfs_secret)
     secret = OCS(**self.cephfs_secret)
     secret.create()
     self.cephfs_sc = templating.load_yaml_to_dict(
         constants.CSI_CEPHFS_STORAGECLASS_YAML)
     self.cephfs_sc['parameters']['monitors'] = self.mons
     self.cephfs_sc['parameters']['pool'] = (
         f"{self.fs_data['metadata']['name']}-data0")
     storage_class = OCS(**self.cephfs_sc)
     storage_class.create()
     self.cephfs_pvc = templating.load_yaml_to_dict(
         constants.CSI_CEPHFS_PVC_YAML)
     pvc = PVC(**self.cephfs_pvc)
     pvc.create()
     log.info(pvc.status)
     assert 'Bound' in pvc.status
     pvc.delete()
     storage_class.delete()
     secret.delete()
 def test_basics_cephfs(self):
     """
     Testing basics: secret creation,
      storage class creation  and pvc with rbd
     """
     self.rbd_secret = templating.load_yaml_to_dict(
         constants.CSI_RBD_SECRET_YAML)
     del self.rbd_secret['data']['kubernetes']
     self.rbd_secret['data']['admin'] = get_admin_key_from_ceph_tools()
     logging.info(self.rbd_secret)
     secret = OCS(**self.rbd_secret)
     secret.create()
     self.rbd_sc = templating.load_yaml_to_dict(
         constants.CSI_RBD_STORAGECLASS_YAML)
     self.rbd_sc['parameters']['monitors'] = self.mons
     del self.rbd_sc['parameters']['userid']
     storage_class = OCS(**self.rbd_sc)
     storage_class.create()
     self.rbd_pvc = templating.load_yaml_to_dict(constants.CSI_RBD_PVC_YAML)
     pvc = PVC(**self.rbd_pvc)
     pvc.create()
     assert 'Bound' in pvc.status
     pvc.delete()
     storage_class.delete()
     secret.delete()
Beispiel #4
0
def invalid_storageclass(request):
    """
    Creates a CephFS or RBD StorageClass with invalid parameters.

    Storageclass is removed at the end of test.

    Returns:
        str: Name of created StorageClass
    """
    logger.info(f"SETUP - creating storageclass "
                f"{request.param['values']['storageclass_name']}")
    yaml_path = os.path.join(request.param['template_dir'],
                             "storageclass.yaml")
    with open(yaml_path, 'r') as fd:
        yaml_data = yaml.safe_load(fd)
    yaml_data.update(request.param['values'])
    storageclass = OCS(**yaml_data)
    sc_data = storageclass.create()

    logger.debug('Check that storageclass has assigned creationTimestamp')
    assert sc_data['metadata']['creationTimestamp']

    yield sc_data

    logger.info(f"TEARDOWN - removing storageclass "
                f"{request.param['values']['storageclass_name']}")
    storageclass.delete()
Beispiel #5
0
    def teardown(self):
        """
        Delete objects created in roughly reverse order of how they were created.

        """
        self.cb_examples.delete()
        self.cb_worker.delete()
        self.cb_deploy.delete()
        self.pod_obj.exec_oc_cmd(
            command="delete rolebinding couchbase-operator-rolebinding")
        self.pod_obj.exec_oc_cmd(
            command="delete serviceaccount couchbase-operator")
        self.operator_role.delete()
        self.couchbase_obj.delete()
        switch_to_project('default')
        self.pod_obj.delete_project(constants.COUCHBASE_OPERATOR)
        for adm_yaml in self.admission_parts:
            adm_data = templating.load_yaml(adm_yaml)
            adm_obj = OCS(**adm_data)
            adm_obj.delete()
        # Before the code below was added, the teardown task would sometimes
        # fail with the leftover objects because it would still see one of the
        # couchbase pods.
        for admin_pod in TimeoutSampler(self.WAIT_FOR_TIME, 3,
                                        get_pod_name_by_pattern, 'couchbase',
                                        'default'):
            if admin_pod:
                continue
            else:
                break
        PillowFight.cleanup(self)
        switch_to_default_rook_cluster_project()
Beispiel #6
0
    def test_fio_workload_simple(self, ripsaw, interface, io_pattern):
        """
        This is a basic fio perf test
        """
        # Deployment ripsaw
        log.info("Deploying ripsaw operator")
        ripsaw.apply_crd(
            'resources/crds/'
            'ripsaw_v1alpha1_ripsaw_crd.yaml'
        )
        sc = 'ocs-storagecluster-ceph-rbd' if interface == 'CephBlockPool' else 'ocs-storagecluster-cephfs'

        # Create fio benchmark
        log.info("Create resource file for fio workload")
        fio_cr = templating.load_yaml(constants.FIO_CR_YAML)
        # Todo: have pvc_size set to 'get_osd_pods_memory_sum * 5'
        #  once pr-2037 is merged
        fio_cr['spec']['clustername'] = config.ENV_DATA['platform'] + get_build() + get_ocs_version()
        fio_cr['spec']['test_user'] = get_ocs_version() + interface + io_pattern
        fio_cr['spec']['workload']['args']['storageclass'] = sc
        if io_pattern == 'sequential':
            fio_cr['spec']['workload']['args']['jobs'] = ['write', 'read']
        log.info(f'fio_cr: {fio_cr}')
        fio_cr_obj = OCS(**fio_cr)
        fio_cr_obj.create()

        # Wait for fio client pod to be created
        for fio_pod in TimeoutSampler(
            300, 20, get_pod_name_by_pattern, 'fio-client', 'my-ripsaw'
        ):
            try:
                if fio_pod[0] is not None:
                    fio_client_pod = fio_pod[0]
                    break
            except IndexError:
                log.info("Bench pod not ready yet")

        # Wait for fio pod to initialized and complete
        log.info("Waiting for fio_client to complete")
        pod_obj = OCP(kind='pod')
        pod_obj.wait_for_resource(
            condition='Completed',
            resource_name=fio_client_pod,
            timeout=18000,
            sleep=300,
        )

        output = run_cmd(f'oc logs {fio_client_pod}')

        try:
            if 'Fio failed to execute' not in output:
                log.info("FIO has completed successfully")
        except IOError:
            log.info("FIO failed to complete")

        # Clean up fio benchmark
        log.info("Deleting FIO benchmark")
        fio_cr_obj.delete()
        analyze_regression(io_pattern, sc, es_username=fio_cr['spec']['test_user'])
Beispiel #7
0
    def test_sql_workload_simple(self, ripsaw):
        """
        This is a basic pgsql workload
        """
        # Deployment postgres
        log.info("Deploying postgres database")
        ripsaw.apply_crd('resources/crds/' 'ripsaw_v1alpha1_ripsaw_crd.yaml')
        ripsaw.setup_postgresql()

        # Create pgbench benchmark
        log.info("Create resource file for pgbench workload")
        pg_data = templating.load_yaml(constants.PGSQL_BENCHMARK_YAML)
        pg_obj = OCS(**pg_data)
        pg_obj.create()

        # Wait for pgbench pod to be created
        for pgbench_pod in TimeoutSampler(300, 3, get_pod_name_by_pattern,
                                          'pgbench-1-dbs-client', 'my-ripsaw'):
            try:
                if pgbench_pod[0] is not None:
                    pgbench_client_pod = pgbench_pod[0]
                    break
            except IndexError:
                log.info("Bench pod not ready yet")

        # Wait for pg_bench pod to initialized and complete
        log.info("Waiting for pgbench_client to complete")
        pod_obj = OCP(kind='pod')
        pod_obj.wait_for_resource(
            condition='Completed',
            resource_name=pgbench_client_pod,
            timeout=800,
            sleep=10,
        )

        # Running pgbench and parsing logs
        output = run_cmd(f'oc logs {pgbench_client_pod}')
        pg_output = utils.parse_pgsql_logs(output)
        log.info("*******PGBench output log*********\n" f"{pg_output}")
        for data in pg_output:
            latency_avg = data['latency_avg']
            if not latency_avg:
                raise UnexpectedBehaviour("PGBench failed to run, "
                                          "no data found on latency_avg")
        log.info("PGBench has completed successfully")

        # Clean up pgbench benchmark
        log.info("Deleting PG bench benchmark")
        pg_obj.delete()
Beispiel #8
0
    def test_sql_workload_simple(self, ripsaw):
        """
        This is a basic pgsql workload
        """
        # Deployment postgres
        log.info("Deploying postgres database")
        ripsaw.apply_crd('resources/crds/' 'ripsaw_v1alpha1_ripsaw_crd.yaml')
        ripsaw.setup_postgresql()
        run_cmd('bin/oc wait --for condition=ready pod '
                '-l app=postgres '
                '--timeout=120s')

        # Create pgbench benchmark
        log.info("Create resource file for pgbench workload")
        pg_data = templating.load_yaml_to_dict(constants.PGSQL_BENCHMARK_YAML)
        pg_obj = OCS(**pg_data)
        pg_obj.create()
        # Wait for pgbench pod to be created
        log.info("waiting for pgbench benchmark to create, "
                 f"PGbench pod name: {pg_obj.name} ")
        wait_time = 30
        log.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)

        pgbench_pod = run_cmd('bin/oc get pods -l '
                              'app=pgbench-client -o name')
        pgbench_pod = pgbench_pod.split('/')[1]
        run_cmd('bin/oc wait --for condition=Initialized '
                f'pods/{pgbench_pod} '
                '--timeout=60s')
        run_cmd('bin/oc wait --for condition=Complete jobs '
                '-l app=pgbench-client '
                '--timeout=300s')

        # Running pgbench and parsing logs
        output = run_cmd(f'bin/oc logs {pgbench_pod}')
        pg_output = utils.parse_pgsql_logs(output)
        log.info("*******PGBench output log*********\n" f"{pg_output}")
        for data in pg_output:
            latency_avg = data['latency_avg']
            if not latency_avg:
                raise UnexpectedBehaviour("PGBench failed to run, "
                                          "no data found on latency_avg")
        log.info("PGBench has completed successfully")

        # Clean up pgbench benchmark
        log.info("Deleting PG bench benchmark:")
        pg_obj.delete()
Beispiel #9
0
    def test_verify_all_fields_in_sc_yaml_with_oc_describe(self, interface):
        """
        Test function to create RBD and CephFS SC, and match with oc describe sc
        output
        """
        log.info(f"Creating a {interface} storage class")
        self.sc_data = templating.load_yaml(
            getattr(constants, f"CSI_{interface}_STORAGECLASS_YAML"))
        self.sc_data['metadata']['name'] = (
            helpers.create_unique_resource_name('test',
                                                f'csi-{interface.lower()}'))
        global SC_OBJ
        SC_OBJ = OCS(**self.sc_data)
        assert SC_OBJ.create()
        log.info(
            f"{interface}Storage class: {SC_OBJ.name} created successfully")
        log.info(self.sc_data)

        # Get oc describe sc output
        describe_out = SC_OBJ.get("sc")
        log.info(describe_out)

        # Confirm that sc yaml details matches oc describe sc output
        value = {
            k: describe_out[k]
            for k in set(describe_out) - set(self.sc_data)
        }
        assert len(value) == 1 and value['volumeBindingMode'] == 'Immediate', (
            "OC describe sc output didn't match storage class yaml")
        log.info("OC describe sc output matches storage class yaml")
        # Delete Storage Class
        log.info(f"Deleting Storageclass: {SC_OBJ.name}")
        assert SC_OBJ.delete()
        log.info(f"Storage Class: {SC_OBJ.name} deleted successfully")
        del SC_OBJ
Beispiel #10
0
    def cleanup(self):
        """
        Remove pillowfight pods and temp files

        """
        pf_files = listdir(constants.TEMPLATE_PILLOWFIGHT_DIR)
        for pf_yaml in pf_files:
            pf_fullpath = join(constants.TEMPLATE_PILLOWFIGHT_DIR, pf_yaml)
            if not pf_fullpath.endswith('.yaml'):
                continue
            if not isfile(pf_fullpath):
                continue
            pfight = templating.load_yaml(pf_fullpath)
            lpillowfight = OCS(**pfight)
            try:
                lpillowfight.delete()
            except CommandFailed:
                log.info(f"{pf_fullpath} object is already deleted")
        rmtree(self.logs)
Beispiel #11
0
    def cleanup(self):
        """
        Remove pillowfight pods and temp files

        """
        pf_files = listdir(constants.TEMPLATE_PILLOWFIGHT_DIR)
        for pf_yaml in pf_files:
            pf_fullpath = join(constants.TEMPLATE_PILLOWFIGHT_DIR, pf_yaml)
            if not pf_fullpath.endswith('.yaml'):
                continue
            if not isfile(pf_fullpath):
                continue
            pfight = templating.load_yaml(pf_fullpath)
            lpillowfight = OCS(**pfight)
            try:
                lpillowfight.delete()
            except CommandFailed:
                log.info(f"{pf_fullpath} object is already deleted")
        rmtree(self.logs)
        nsinfo = self.pod_obj.exec_oc_cmd(command="get namespace")
        if self.COUCHBASE_OPERATOR in nsinfo:
            self.pod_obj.exec_oc_cmd(
                command=f"delete namespace {self.COUCHBASE_OPERATOR}")
class SmallFiles(BenchmarkOperator):
    """
    Small_Files workload benchmark
    """
    def __init__(self, es, **kwargs):
        """
        Initializer function

        Args:
            es (obj): elastic search instance object

        """
        self.es = es
        self.dev_mode = config.RUN["cli_params"].get("dev_mode")
        super().__init__(**kwargs)

        # Loading the main template yaml file for the benchmark
        log.info("Loading the CRD Template file")
        self.crd_data = templating.load_yaml(
            constants.SMALLFILE_BENCHMARK_YAML)
        assert (self._setup_elasticsearch()
                ), "Can not execute the workload without ES server"
        self.deploy()

    def _setup_elasticsearch(self):
        """
        Setting up the elastic search parameters in the CRD object.

        Return:
            bool : True if there is ES to connect, False otherwise

        """
        log.info("Setting up the elasticsearch configuration")
        self.crd_data["spec"]["elasticsearch"] = {}
        if not self.dev_mode and config.PERF.get("production_es"):
            log.info("Setting ES to production !")
            self.crd_data["spec"]["elasticsearch"] = {
                "server": config.PERF.get("production_es_server"),
                "port": config.PERF.get("production_es_port"),
            }
        elif self.dev_mode and config.PERF.get("dev_lab_es"):
            log.info("Setting ES to development one !")
            self.crd_data["spec"]["elasticsearch"] = {
                "server": config.PERF.get("dev_es_server"),
                "port": config.PERF.get("dev_es_port"),
            }

        if not self.crd_data["spec"]["elasticsearch"] == {}:
            self.crd_data["spec"]["elasticsearch"][
                "url"] = "http://{}:{}".format(
                    self.crd_data["spec"]["elasticsearch"]["server"],
                    self.crd_data["spec"]["elasticsearch"]["port"],
                )
            self.crd_data["spec"]["elasticsearch"]["parallel"] = True

        # Saving the Original elastic-search IP and PORT - if defined in yaml
        self.backup_es = self.crd_data["spec"]["elasticsearch"]

        # Use the internal define elastic-search server in the test - if exist
        if self.es:
            self.crd_data["spec"]["elasticsearch"] = {
                "url": f"http://{self.es.get_ip()}:{self.es.get_port()}",
                "server": self.es.get_ip(),
                "port": self.es.get_port(),
                "parallel": True,
            }
        if self.crd_data["spec"]["elasticsearch"] == {}:
            log.error(
                "No ElasticSearch server is available. workload can not be execute"
            )
            return False

        return True

    def setup_storageclass(self, interface):
        """
        Setting up the storageclass parameter in the CRD object

        Args:
            interface (str): the storage interface

        """
        if interface == constants.CEPHBLOCKPOOL:
            storageclass = constants.DEFAULT_STORAGECLASS_RBD
        else:
            storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS
        log.info(f"Using {storageclass} Storageclass")
        self.crd_data["spec"]["workload"]["args"][
            "storageclass"] = storageclass

    def setup_test_params(self, file_size, files, threads, samples):
        """
        Setting up the parameters for this test

        Args:
            file_size (int): the file size in KB
            files (int): number of file to use in the test
            threads (int): number of threads to use in the test
            samples (int): number of sample to run the test

        """
        self.crd_data["spec"]["workload"]["args"]["file_size"] = file_size
        self.crd_data["spec"]["workload"]["args"]["files"] = files
        self.crd_data["spec"]["workload"]["args"]["threads"] = threads
        self.crd_data["spec"]["workload"]["args"]["samples"] = samples

    def setup_vol_size(self, file_size, files, threads, total_capacity):
        """
        Calculating the size of the volume that need to be test, it should
        be at least twice in the size then the size of the files, and at
        least 100Gi.

        Since the file_size is in Kb and the vol_size need to be in Gb, more
        calculation is needed.

        Args:
            file_size (int): the file size in KB
            files (int): number of file to use in the test
            threads (int): number of threads to use in the test
            total_capacity (int): The total usable storage capacity in GiB

        """
        vol_size = int(files * threads * file_size * 3)
        vol_size = int(vol_size / constants.GB2KB)
        if vol_size < 100:
            vol_size = 100
        errmsg = ("There is not enough storage to run the test. "
                  f"Storage capacity : {total_capacity:,.2f} GiB, "
                  f"Needed capacity is more then {vol_size:,.2f} GiB")
        assert vol_size < total_capacity, errmsg
        self.crd_data["spec"]["workload"]["args"][
            "storagesize"] = f"{vol_size}Gi"

    def setup_operations(self, ops):
        """
        Setting up the test operations

        Args:
            ops : can be list of operations or a string of one operation

        """
        if isinstance(ops, list):
            self.crd_data["spec"]["workload"]["args"]["operation"] = ops
        elif isinstance(ops, str):
            self.crd_data["spec"]["workload"]["args"]["operation"] = [ops]

    def run(self):
        """
        Run the benchmark and wait until it completed

        """
        # Create the benchmark object
        self.sf_obj = OCS(**self.crd_data)
        self.sf_obj.create()

        # Wait for benchmark pods to get created - takes a while
        for bench_pod in TimeoutSampler(
                240,
                10,
                get_pod_name_by_pattern,
                "smallfile-client",
                benchmark_operator.BMO_NAME,
        ):
            try:
                if bench_pod[0] is not None:
                    small_file_client_pod = bench_pod[0]
                    break
            except IndexError:
                log.info("Bench pod not ready yet")

        bench_pod = OCP(kind="pod", namespace=benchmark_operator.BMO_NAME)
        log.info("Waiting for SmallFile benchmark to Run")
        assert bench_pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            resource_name=small_file_client_pod,
            sleep=30,
            timeout=600,
        )
        log.info("The SmallFiles benchmark is running, wait for completion")
        bench_pod.wait_for_resource(
            condition=constants.STATUS_COMPLETED,
            resource_name=small_file_client_pod,
            timeout=18000,
            sleep=60,
        )
        log.info("The SmallFiles benchmark is completed")

    def delete(self):
        """
        Delete the benchmark

        """
        log.info("Deleting The Small Files benchmark")
        self.sf_obj.delete()
Beispiel #13
0
class AMQ(object):
    """
    Workload operation using AMQ
    """

    def __init__(self, **kwargs):
        """
        Initializer function

        Args:
            kwargs (dict):
                Following kwargs are valid
                namespace: namespace for the operator
                repo: AMQ repo where all necessary yaml file are there - a github link
                branch: branch to use from the repo
        """
        self.args = kwargs
        self.repo = self.args.get("repo", constants.KAFKA_OPERATOR)
        self.branch = self.args.get("branch", "master")
        self.ocp = OCP()
        self.ns_obj = OCP(kind="namespace")
        self.pod_obj = OCP(kind="pod")
        self.kafka_obj = OCP(kind="Kafka")
        self.kafka_connect_obj = OCP(kind="KafkaConnect")
        self.kafka_bridge_obj = OCP(kind="KafkaBridge")
        self.kafka_topic_obj = OCP(kind="KafkaTopic")
        self.kafka_user_obj = OCP(kind="KafkaUser")
        self.amq_is_setup = False
        self.messaging = False
        self.benchmark = False
        self.consumer_pod = self.producer_pod = None
        self.kafka_topic = self.kafka_user = None
        self.kafka_connect = self.kafka_bridge = self.kafka_persistent = None
        self.dir = tempfile.mkdtemp(prefix="amq_")
        self._clone_amq()

    def _clone_amq(self):
        """
        clone the amq repo
        """
        try:
            log.info(f"cloning amq in {self.dir}")
            git_clone_cmd = f"git clone {self.repo} "
            run(git_clone_cmd, shell=True, cwd=self.dir, check=True)
            self.amq_dir = "strimzi-kafka-operator/packaging/install/cluster-operator/"
            self.amq_kafka_pers_yaml = (
                "strimzi-kafka-operator/packaging/examples/kafka/kafka-persistent.yaml"
            )
            self.amq_kafka_connect_yaml = (
                "strimzi-kafka-operator/packaging/examples/connect/kafka-connect.yaml"
            )
            self.amq_kafka_bridge_yaml = (
                "strimzi-kafka-operator/packaging/examples/bridge/kafka-bridge.yaml"
            )
            self.kafka_topic_yaml = (
                "strimzi-kafka-operator/packaging/examples/topic/kafka-topic.yaml"
            )
            self.kafka_user_yaml = (
                "strimzi-kafka-operator/packaging/examples/user/kafka-user.yaml"
            )
            self.hello_world_producer_yaml = constants.HELLO_WORLD_PRODUCER_YAML
            self.hello_world_consumer_yaml = constants.HELLO_WORLD_CONSUMER_YAML

        except (CommandFailed, CalledProcessError) as cf:
            log.error("Error during cloning of amq repository")
            raise cf

    def create_namespace(self, namespace):
        """
        create namespace for amq

        Args:
            namespace (str): Namespace for amq pods
        """
        self.ocp.new_project(namespace)

    def setup_amq_cluster_operator(self, namespace=constants.AMQ_NAMESPACE):
        """
        Function to setup amq-cluster_operator,
        the file is pulling from github
        it will make sure cluster-operator pod is running

        Args:
            namespace (str): Namespace for AMQ pods

        """

        # Namespace for amq
        try:
            self.create_namespace(namespace)
        except CommandFailed as ef:
            if f'project.project.openshift.io "{namespace}" already exists' not in str(
                ef
            ):
                raise ef

        # Create strimzi-cluster-operator pod
        run(
            f"for i in `(ls strimzi-kafka-operator/packaging/install/cluster-operator/)`;"
            f"do sed 's/{namespace}/myproject/g' "
            f"strimzi-kafka-operator/packaging/install/cluster-operator/$i;done",
            shell=True,
            check=True,
            cwd=self.dir,
        )
        self.strimzi_kafka_operator = os.path.join(self.dir, self.amq_dir)
        pf_files = os.listdir(self.strimzi_kafka_operator)
        crds = []
        for crd in pf_files:
            crds.append(crd)
        self.crd_objects = []
        for adm_yaml in crds:
            try:
                adm_data = templating.load_yaml(self.strimzi_kafka_operator + adm_yaml)
                adm_obj = OCS(**adm_data)
                adm_obj.create()
                self.crd_objects.append(adm_obj)
            except (CommandFailed, CalledProcessError) as cfe:
                if "Error is Error from server (AlreadyExists):" in str(cfe):
                    log.warn(
                        "Some amq leftovers are present, please cleanup the cluster"
                    )
                    pytest.skip(
                        "AMQ leftovers are present needs to cleanup the cluster"
                    )
        time.sleep(30)
        #  Check strimzi-cluster-operator pod created
        if self.is_amq_pod_running(pod_pattern="cluster-operator", expected_pods=1):
            log.info("strimzi-cluster-operator pod is in running state")
        else:
            raise ResourceWrongStatusException(
                "strimzi-cluster-operator pod is not getting to running state"
            )

    def is_amq_pod_running(
        self, pod_pattern, expected_pods, namespace=constants.AMQ_NAMESPACE
    ):
        """
        The function checks if provided pod_pattern finds a pod and if the status is running or not

        Args:
            pod_pattern (str): the pattern for pod
            expected_pods (int): Number of pods
            namespace (str): Namespace for amq pods

        Returns:
            bool: status of pod: True if found pod is running

        """

        _rc = True

        for pod in TimeoutSampler(
            300, 10, get_pod_name_by_pattern, pod_pattern, namespace
        ):
            try:
                if pod is not None and len(pod) == expected_pods:
                    amq_pod = pod
                    break
            except IndexError as ie:
                log.error(" pod not ready yet")
                raise ie

        # checking pod status
        for pod in amq_pod:
            if self.pod_obj.wait_for_resource(
                condition="Running",
                resource_name=pod,
                timeout=1600,
                sleep=30,
            ):
                log.info(f"{pod} pod is up and running")
            else:
                _rc = False
                log.error(f"{pod} pod is not running")

        return _rc

    def setup_amq_kafka_persistent(self, sc_name, size=100, replicas=3):
        """
        Function to setup amq-kafka-persistent, the file is pulling from github
        it will make kind: Kafka and will make sure the status is running

        Args:
            sc_name (str): Name of sc
            size (int): Size of the storage in Gi
            replicas (int): Number of kafka and zookeeper pods to be created

        return : kafka_persistent

        """
        if storagecluster_independent_check():
            sc_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD
        try:
            kafka_persistent = templating.load_yaml(
                os.path.join(self.dir, self.amq_kafka_pers_yaml)
            )
            kafka_persistent["spec"]["kafka"]["replicas"] = replicas
            kafka_persistent["spec"]["kafka"]["storage"]["volumes"][0][
                "class"
            ] = sc_name
            kafka_persistent["spec"]["kafka"]["storage"]["volumes"][0][
                "size"
            ] = f"{size}Gi"

            kafka_persistent["spec"]["zookeeper"]["replicas"] = replicas
            kafka_persistent["spec"]["zookeeper"]["storage"]["class"] = sc_name
            kafka_persistent["spec"]["zookeeper"]["storage"]["size"] = f"{size}Gi"
            self.kafka_persistent = OCS(**kafka_persistent)
            self.kafka_persistent.create()

        except (CommandFailed, CalledProcessError) as cf:
            log.error("Failed during setup of AMQ Kafka-persistent")
            raise cf
        time.sleep(40)

        if self.is_amq_pod_running(
            pod_pattern="my-cluster", expected_pods=(replicas * 2) + 1
        ):
            return self.kafka_persistent
        else:
            raise ResourceWrongStatusException(
                "my-cluster-kafka and my-cluster-zookeeper "
                "Pod is not getting to running state"
            )

    def setup_amq_kafka_connect(self):
        """
        The function is to setup amq-kafka-connect, the yaml file is pulling from github
        it will make kind: KafkaConnect and will make sure the status is running

        Returns: kafka_connect object
        """
        try:
            kafka_connect = templating.load_yaml(
                os.path.join(self.dir, self.amq_kafka_connect_yaml)
            )
            self.kafka_connect = OCS(**kafka_connect)
            self.kafka_connect.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error("Failed during setup of AMQ KafkaConnect")
            raise cf

        if self.is_amq_pod_running(
            pod_pattern="my-connect-cluster-connect", expected_pods=1
        ):
            return self.kafka_connect
        else:
            raise ResourceWrongStatusException(
                "my-connect-cluster-connect pod is not getting to running state"
            )

    def setup_amq_kafka_bridge(self):
        """
        Function to setup amq-kafka, the file file is pulling from github
        it will make kind: KafkaBridge and will make sure the pod status is running

        Return: kafka_bridge object
        """
        try:
            kafka_bridge = templating.load_yaml(
                os.path.join(self.dir, self.amq_kafka_bridge_yaml)
            )
            self.kafka_bridge = OCS(**kafka_bridge)
            self.kafka_bridge.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error("Failed during setup of AMQ KafkaConnect")
            raise cf
        # Making sure the kafka_bridge is running
        if self.is_amq_pod_running(pod_pattern="my-bridge-bridge", expected_pods=1):
            return self.kafka_bridge
        else:
            raise ResourceWrongStatusException(
                "kafka_bridge_pod pod is not getting to running state"
            )

    def create_kafka_topic(self, name="my-topic", partitions=1, replicas=1):
        """
        Creates kafka topic

        Args:
            name (str): Name of the kafka topic
            partitions (int): Number of partitions
            replicas (int): Number of replicas

        Return: kafka_topic object
        """
        try:
            kafka_topic = templating.load_yaml(
                os.path.join(self.dir, self.kafka_topic_yaml)
            )
            kafka_topic["metadata"]["name"] = name
            kafka_topic["spec"]["partitions"] = partitions
            kafka_topic["spec"]["replicas"] = replicas
            self.kafka_topic = OCS(**kafka_topic)
            self.kafka_topic.create()
        except (CommandFailed, CalledProcessError) as cf:
            if f'kafkatopics.kafka.strimzi.io "{name}" already exists' not in str(cf):
                log.error("Failed during creating of Kafka topic")
                raise cf

        # Making sure kafka topic created
        if self.kafka_topic_obj.get(resource_name=name):
            return self.kafka_topic
        else:
            raise ResourceWrongStatusException("kafka topic is not created")

    def create_kafka_user(self, name="my-user"):
        """
        Creates kafka user

        Args:
             name (str): Name of the kafka user

        Return: kafka_user object

        """
        try:
            kafka_user = templating.load_yaml(
                os.path.join(self.dir, self.kafka_user_yaml)
            )
            kafka_user["metadata"]["name"] = name
            self.kafka_user = OCS(**kafka_user)
            self.kafka_user.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error("Failed during creating of Kafka user")
            raise cf

        # Making sure kafka user created
        if self.kafka_user_obj.get(resource_name=name):
            return self.kafka_user
        else:
            raise ResourceWrongStatusException("kafka user is not created")

    def create_producer_pod(self, num_of_pods=1, value="10000"):
        """
        Creates producer pods

        Args:
            num_of_pods (int): Number of producer pods to be created
            value (str): Number of the messages to be sent

        Returns: producer pod object

        """
        try:
            producer_pod = templating.load_yaml(constants.HELLO_WORLD_PRODUCER_YAML)
            producer_pod["spec"]["replicas"] = num_of_pods
            producer_pod["spec"]["template"]["spec"]["containers"][0]["env"][4][
                "value"
            ] = value
            self.producer_pod = OCS(**producer_pod)
            self.producer_pod.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error("Failed during creation of producer pod")
            raise cf

        # Making sure the producer pod is running
        if self.is_amq_pod_running(
            pod_pattern="hello-world-producer", expected_pods=num_of_pods
        ):
            return self.producer_pod
        else:
            raise ResourceWrongStatusException(
                "producer pod is not getting to running state"
            )

    def create_consumer_pod(self, num_of_pods=1, value="10000"):
        """
        Creates producer pods

        Args:
            num_of_pods (int): Number of consumer pods to be created
            value (str): Number of messages to be received

        Returns: consumer pod object

        """
        try:
            consumer_pod = templating.load_yaml(constants.HELLO_WORLD_CONSUMER_YAML)
            consumer_pod["spec"]["replicas"] = num_of_pods
            consumer_pod["spec"]["template"]["spec"]["containers"][0]["env"][4][
                "value"
            ] = value
            self.consumer_pod = OCS(**consumer_pod)
            self.consumer_pod.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error("Failed during creation of consumer pod")
            raise cf

        # Making sure the producer pod is running
        if self.is_amq_pod_running(
            pod_pattern="hello-world-consumer", expected_pods=num_of_pods
        ):
            return self.consumer_pod
        else:
            raise ResourceWrongStatusException(
                "consumer pod is not getting to running state"
            )

    def validate_msg(
        self, pod, namespace=constants.AMQ_NAMESPACE, value="10000", since_time=1800
    ):
        """
        Validate if messages are sent or received

        Args:
            pod (str): Name of the pod
            namespace (str): Namespace of the pod
            value (str): Number of messages are sent
            since_time (int): Number of seconds to required to sent the msg

        Returns:
            bool : True if all messages are sent/received

        """
        cmd = f"oc logs -n {namespace} {pod} --since={since_time}s"
        msg = run_cmd(cmd)
        substring = f"Hello world - {int(value) - 1}"
        if msg.find(substring) == -1:
            return False
        else:
            return True

    def validate_messages_are_produced(
        self, namespace=constants.AMQ_NAMESPACE, value="10000", since_time=1800
    ):
        """
        Validates if all messages are sent in producer pod

        Args:
            namespace (str): Namespace of the pod
            value (str): Number of messages are sent
            since_time (int): Number of seconds to required to sent the msg

        Raises exception on failures

        """
        # ToDo: Support multiple topics and users
        producer_pod_objs = [
            get_pod_obj(pod)
            for pod in get_pod_name_by_pattern("hello-world-produce", namespace)
        ]
        for pod in producer_pod_objs:
            for msg in TimeoutSampler(
                900, 30, self.validate_msg, pod.name, namespace, value, since_time
            ):
                if msg:
                    break
        assert msg, "Few messages are not sent by producer"
        log.info("Producer sent all messages")

    def validate_messages_are_consumed(
        self, namespace=constants.AMQ_NAMESPACE, value="10000", since_time=1800
    ):
        """
        Validates if all messages are received in consumer pod

        Args:
            namespace (str): Namespace of the pod
            value (str): Number of messages are recieved
            since_time (int): Number of seconds to required to receive the msg

        Raises exception on failures

        """
        # ToDo: Support multiple topics and users
        consumer_pod_objs = [
            get_pod_obj(pod)
            for pod in get_pod_name_by_pattern("hello-world-consumer", namespace)
        ]
        for pod in consumer_pod_objs:
            for msg in TimeoutSampler(
                900, 30, self.validate_msg, pod.name, namespace, value, since_time
            ):
                if msg:
                    break
        assert msg, "Consumer didn't receive all messages"
        log.info("Consumer received all messages")

    def run_in_bg(
        self, namespace=constants.AMQ_NAMESPACE, value="10000", since_time=1800
    ):
        """
        Validate messages are produced and consumed in bg

        Args:
            namespace (str): Namespace of the pod
            value (str): Number of messages to be sent and received
            since_time (int): Number of seconds to required to sent and receive msg

        """
        # Todo: Check for each messages sent and received
        log.info("Running open messages on pod in bg")
        threads = []

        executor = ThreadPoolExecutor(2)
        threads.append(
            executor.submit(
                self.validate_messages_are_produced, namespace, value, since_time
            )
        )
        threads.append(
            executor.submit(
                self.validate_messages_are_consumed, namespace, value, since_time
            )
        )

        return threads

    def run_amq_benchmark(
        self,
        benchmark_pod_name="benchmark",
        kafka_namespace=constants.AMQ_NAMESPACE,
        tiller_namespace=AMQ_BENCHMARK_NAMESPACE,
        num_of_clients=8,
        worker=None,
        timeout=1800,
        amq_workload_yaml=None,
        run_in_bg=False,
    ):
        """
        Run benchmark pod and get the results

        Args:
            benchmark_pod_name (str): Name of the benchmark pod
            kafka_namespace (str): Namespace where kafka cluster created
            tiller_namespace (str): Namespace where tiller pod needs to be created
            num_of_clients (int): Number of clients to be created
            worker (str) : Loads to create on workloads separated with commas
                e.g http://benchmark-worker-0.benchmark-worker:8080,
                http://benchmark-worker-1.benchmark-worker:8080
            timeout (int): Time to complete the run
            amq_workload_yaml (dict): Contains amq workloads information keys and values
                :name (str): Name of the workloads
                :topics (int): Number of topics created
                :partitions_per_topic (int): Number of partitions per topic
                :message_size (int): Message size
                :payload_file (str): Load to run on workload
                :subscriptions_per_topic (int): Number of subscriptions per topic
                :consumer_per_subscription (int): Number of consumers per subscription
                :producers_per_topic (int): Number of producers per topic
                :producer_rate (int): Producer rate
                :consumer_backlog_sizegb (int): Size of block in gb
                :test_duration_minutes (int): Time to run the workloads
            run_in_bg (bool): On true the workload will run in background

        Return:
            result (str/Thread obj): Returns benchmark run information if run_in_bg is False.
                Otherwise a thread of the amq workload execution

        """

        # Namespace for to helm/tiller
        try:
            self.create_namespace(tiller_namespace)
        except CommandFailed as ef:
            if (
                f'project.project.openshift.io "{tiller_namespace}" already exists'
                not in str(ef)
            ):
                raise ef

        # Create rbac file
        try:
            sa_tiller = list(
                templating.load_yaml(constants.AMQ_RBAC_YAML, multi_document=True)
            )
            sa_tiller[0]["metadata"]["namespace"] = tiller_namespace
            sa_tiller[1]["subjects"][0]["namespace"] = tiller_namespace
            self.sa_tiller = OCS(**sa_tiller[0])
            self.crb_tiller = OCS(**sa_tiller[1])
            self.sa_tiller.create()
            self.crb_tiller.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error("Failed during creation of service account tiller")
            raise cf

        # Install helm cli (version v2.16.0 as we need tiller component)
        # And create tiller pods
        wget_cmd = f"wget -c --read-timeout=5 --tries=0 {URL}"
        untar_cmd = "tar -zxvf helm-v2.16.1-linux-amd64.tar.gz"
        tiller_cmd = (
            f"linux-amd64/helm init --tiller-namespace {tiller_namespace}"
            f" --service-account {tiller_namespace}"
        )
        exec_cmd(cmd=wget_cmd, cwd=self.dir)
        exec_cmd(cmd=untar_cmd, cwd=self.dir)
        exec_cmd(cmd=tiller_cmd, cwd=self.dir)

        # Validate tiller pod is running
        log.info("Waiting for 30s for tiller pod to come up")
        time.sleep(30)
        if self.is_amq_pod_running(
            pod_pattern="tiller", expected_pods=1, namespace=tiller_namespace
        ):
            log.info("Tiller pod is running")
        else:
            raise ResourceWrongStatusException("Tiller pod is not in running state")

        # Create benchmark pods
        log.info("Create benchmark pods")
        values = templating.load_yaml(constants.AMQ_BENCHMARK_VALUE_YAML)
        values["numWorkers"] = num_of_clients
        benchmark_cmd = (
            f"linux-amd64/helm install {constants.AMQ_BENCHMARK_POD_YAML}"
            f" --name {benchmark_pod_name} --tiller-namespace {tiller_namespace}"
        )
        exec_cmd(cmd=benchmark_cmd, cwd=self.dir)

        # Making sure the benchmark pod and clients are running
        if self.is_amq_pod_running(
            pod_pattern="benchmark",
            expected_pods=(1 + num_of_clients),
            namespace=tiller_namespace,
        ):
            log.info("All benchmark pod is up and running")
        else:
            raise ResourceWrongStatusException(
                "Benchmark pod is not getting to running state"
            )

        # Update commonConfig with kafka-bootstrap server details
        driver_kafka = templating.load_yaml(constants.AMQ_DRIVER_KAFKA_YAML)
        driver_kafka[
            "commonConfig"
        ] = f"bootstrap.servers=my-cluster-kafka-bootstrap.{kafka_namespace}.svc.cluster.local:9092"
        json_file = f"{self.dir}/driver_kafka"
        templating.dump_data_to_json(driver_kafka, json_file)
        cmd = f"cp {json_file} {benchmark_pod_name}-driver:/"
        self.pod_obj.exec_oc_cmd(cmd)

        # Update the workload yaml
        if not amq_workload_yaml:
            amq_workload_yaml = templating.load_yaml(constants.AMQ_WORKLOAD_YAML)
        yaml_file = f"{self.dir}/amq_workload.yaml"
        templating.dump_data_to_temp_yaml(amq_workload_yaml, yaml_file)
        cmd = f"cp {yaml_file} {benchmark_pod_name}-driver:/"
        self.pod_obj.exec_oc_cmd(cmd)

        self.benchmark = True

        # Run the benchmark
        if worker:
            cmd = f"bin/benchmark --drivers /driver_kafka --workers {worker} /amq_workload.yaml"
        else:
            cmd = "bin/benchmark --drivers /driver_kafka /amq_workload.yaml"
        log.info(f"Run benchmark and running command {cmd} inside the benchmark pod ")

        if run_in_bg:
            executor = ThreadPoolExecutor(1)
            result = executor.submit(
                self.run_amq_workload,
                cmd,
                benchmark_pod_name,
                tiller_namespace,
                timeout,
            )
            return result

        pod_obj = get_pod_obj(
            name=f"{benchmark_pod_name}-driver", namespace=tiller_namespace
        )
        result = pod_obj.exec_cmd_on_pod(
            command=cmd, out_yaml_format=False, timeout=timeout
        )

        return result

    def run_amq_workload(self, command, benchmark_pod_name, tiller_namespace, timeout):
        """
        Runs amq workload in bg

        Args:
             command (str): Command to run on pod
             benchmark_pod_name (str): Pod name
             tiller_namespace (str): Namespace of pod
             timeout (int): Time to complete the run

        Returns:
            result (str): Returns benchmark run information

        """
        pod_obj = get_pod_obj(
            name=f"{benchmark_pod_name}-driver", namespace=tiller_namespace
        )
        return pod_obj.exec_cmd_on_pod(
            command=command, out_yaml_format=False, timeout=timeout
        )

    def validate_amq_benchmark(
        self, result, amq_workload_yaml, benchmark_pod_name="benchmark"
    ):
        """
        Validates amq benchmark run

        Args:
            result (str): Benchmark run information
            amq_workload_yaml (dict): AMQ workload information
            benchmark_pod_name (str): Name of the benchmark pod

        Returns:
            res_dict (dict): Returns the dict output on success, Otherwise none

        """
        res_dict = {}
        res_dict["topic"] = amq_workload_yaml["topics"]
        res_dict["partitionsPerTopic"] = amq_workload_yaml["partitionsPerTopic"]
        res_dict["messageSize"] = amq_workload_yaml["messageSize"]
        res_dict["payloadFile"] = amq_workload_yaml["payloadFile"]
        res_dict["subscriptionsPerTopic"] = amq_workload_yaml["subscriptionsPerTopic"]
        res_dict["producersPerTopic"] = amq_workload_yaml["producersPerTopic"]
        res_dict["consumerPerSubscription"] = amq_workload_yaml[
            "consumerPerSubscription"
        ]
        res_dict["producerRate"] = amq_workload_yaml["producerRate"]

        # Validate amq benchmark is completed
        for part in result.split():
            if ".json" in part:
                workload_json_file = part

        if workload_json_file:
            cmd = f"rsync {benchmark_pod_name}-driver:{workload_json_file} {self.dir} -n {AMQ_BENCHMARK_NAMESPACE}"
            self.pod_obj.exec_oc_cmd(command=cmd, out_yaml_format=False)
            # Parse the json file
            with open(f"{self.dir}/{workload_json_file}") as json_file:
                data = json.load(json_file)
            res_dict["AvgpublishRate"] = sum(data.get("publishRate")) / len(
                data.get("publishRate")
            )
            res_dict["AvgConsumerRate"] = sum(data.get("consumeRate")) / len(
                data.get("consumeRate")
            )
            res_dict["AvgMsgBacklog"] = sum(data.get("backlog")) / len(
                data.get("backlog")
            )
            res_dict["publishLatencyAvg"] = sum(data.get("publishLatencyAvg")) / len(
                data.get("publishLatencyAvg")
            )
            res_dict["aggregatedPublishLatencyAvg"] = data.get(
                "aggregatedPublishLatencyAvg"
            )
            res_dict["aggregatedPublishLatency50pct"] = data.get(
                "aggregatedPublishLatency50pct"
            )
            res_dict["aggregatedPublishLatency75pct"] = data.get(
                "aggregatedPublishLatency75pct"
            )
            res_dict["aggregatedPublishLatency95pct"] = data.get(
                "aggregatedPublishLatency95pct"
            )
            res_dict["aggregatedPublishLatency99pct"] = data.get(
                "aggregatedPublishLatency99pct"
            )
            res_dict["aggregatedPublishLatency999pct"] = data.get(
                "aggregatedPublishLatency999pct"
            )
            res_dict["aggregatedPublishLatency9999pct"] = data.get(
                "aggregatedPublishLatency9999pct"
            )
            res_dict["aggregatedPublishLatencyMax"] = data.get(
                "aggregatedPublishLatencyMax"
            )
            res_dict["aggregatedEndToEndLatencyAvg"] = data.get(
                "aggregatedEndToEndLatencyAvg"
            )
            res_dict["aggregatedEndToEndLatency50pct"] = data.get(
                "aggregatedEndToEndLatency50pct"
            )
            res_dict["aggregatedEndToEndLatency75pct"] = data.get(
                "aggregatedEndToEndLatency75pct"
            )
            res_dict["aggregatedEndToEndLatency95pct"] = data.get(
                "aggregatedEndToEndLatency95pct"
            )
            res_dict["aggregatedEndToEndLatency99pct"] = data.get(
                "aggregatedEndToEndLatency99pct"
            )
            res_dict["aggregatedEndToEndLatency999pct"] = data.get(
                "aggregatedEndToEndLatency999pct"
            )
            res_dict["aggregatedEndToEndLatency9999pct"] = data.get(
                "aggregatedEndToEndLatency9999pct"
            )
            res_dict["aggregatedEndToEndLatencyMax"] = data.get(
                "aggregatedEndToEndLatencyMax"
            )
        else:
            log.error("Benchmark didn't run completely")
            return None

        amq_benchmark_pod_table = PrettyTable(["key", "value"])
        for key, val in res_dict.items():
            amq_benchmark_pod_table.add_row([key, val])
        log.info(f"\n{amq_benchmark_pod_table}\n")

        return res_dict

    def export_amq_output_to_gsheet(self, amq_output, sheet_name, sheet_index):
        """
        Collect amq data to google spreadsheet

        Args:
            amq_output (dict):  amq output in dict
            sheet_name (str): Name of the sheet
            sheet_index (int): Index of sheet

        """
        # Collect data and export to Google doc spreadsheet
        g_sheet = GoogleSpreadSheetAPI(sheet_name=sheet_name, sheet_index=sheet_index)
        log.info("Exporting amq data to google spreadsheet")

        headers_to_key = []
        values = []
        for key, val in amq_output.items():
            headers_to_key.append(key)
            values.append(val)

        # Update amq_result to gsheet
        g_sheet.insert_row(values, 2)
        g_sheet.insert_row(headers_to_key, 2)

        # Capturing versions(OCP, OCS and Ceph) and test run name
        g_sheet.insert_row(
            [
                f"ocp_version:{utils.get_cluster_version()}",
                f"ocs_build_number:{utils.get_ocs_build_number()}",
                f"ceph_version:{utils.get_ceph_version()}",
                f"test_run_name:{utils.get_testrun_name()}",
            ],
            2,
        )

    def create_messaging_on_amq(
        self,
        topic_name="my-topic",
        user_name="my-user",
        partitions=1,
        replicas=1,
        num_of_producer_pods=1,
        num_of_consumer_pods=1,
        value="10000",
    ):
        """
        Creates workload using Open Messaging tool on amq cluster

        Args:
            topic_name (str): Name of the topic to be created
            user_name (str): Name of the user to be created
            partitions (int): Number of partitions of topic
            replicas (int): Number of replicas of topic
            num_of_producer_pods (int): Number of producer pods to be created
            num_of_consumer_pods (int): Number of consumer pods to be created
            value (str): Number of messages to be sent and received

        """
        self.create_kafka_topic(topic_name, partitions, replicas)
        self.create_kafka_user(user_name)
        self.create_producer_pod(num_of_producer_pods, value)
        self.create_consumer_pod(num_of_consumer_pods, value)
        self.messaging = True

    def setup_amq_cluster(
        self, sc_name, namespace=constants.AMQ_NAMESPACE, size=100, replicas=3
    ):
        """
        Creates amq cluster with persistent storage.

        Args:
            sc_name (str): Name of sc
            namespace (str): Namespace for amq cluster
            size (int): Size of the storage
            replicas (int): Number of kafka and zookeeper pods to be created

        """
        if storagecluster_independent_check():
            sc_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD
        self.setup_amq_cluster_operator(namespace)
        self.setup_amq_kafka_persistent(sc_name, size, replicas)
        self.setup_amq_kafka_connect()
        self.setup_amq_kafka_bridge()
        self.amq_is_setup = True
        return self

    def create_kafkadrop(self, wait=True):
        """
        Create kafkadrop pod, service and routes

        Args:
            wait (bool): If true waits till kafkadrop pod running

        Return:
            tuple: Contains objects of kafkadrop pod, service and route

        """
        # Create kafkadrop pod
        try:
            kafkadrop = list(
                templating.load_yaml(constants.KAFKADROP_YAML, multi_document=True)
            )
            self.kafkadrop_pod = OCS(**kafkadrop[0])
            self.kafkadrop_svc = OCS(**kafkadrop[1])
            self.kafkadrop_route = OCS(**kafkadrop[2])
            self.kafkadrop_pod.create()
            self.kafkadrop_svc.create()
            self.kafkadrop_route.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error("Failed during creation of kafkadrop which kafka UI")
            raise cf

        # Validate kafkadrop pod running
        if wait:
            ocp_obj = OCP(kind=constants.POD, namespace=constants.AMQ_NAMESPACE)
            ocp_obj.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector="app=kafdrop",
                timeout=120,
                sleep=5,
            )

        return self.kafkadrop_pod, self.kafkadrop_svc, self.kafkadrop_route

    def cleanup(
        self,
        kafka_namespace=constants.AMQ_NAMESPACE,
        tiller_namespace=AMQ_BENCHMARK_NAMESPACE,
    ):
        """
        Clean up function,
        will start to delete from amq cluster operator
        then amq-connector, persistent, bridge, at the end it will delete the created namespace

        Args:
            kafka_namespace (str): Created namespace for amq
            tiller_namespace (str): Created namespace for benchmark

        """

        if self.consumer_pod:
            self.consumer_pod.delete()
        if self.producer_pod:
            self.producer_pod.delete()
        if self.kafka_user:
            self.kafka_user.delete()
        if self.kafka_topic:
            self.kafka_topic.delete()

        if self.benchmark:
            # Delete the helm app
            try:
                purge_cmd = f"linux-amd64/helm delete benchmark --purge --tiller-namespace {tiller_namespace}"
                run(purge_cmd, shell=True, cwd=self.dir, check=True)
            except (CommandFailed, CalledProcessError) as cf:
                log.error("Failed to delete help app")
                raise cf
            # Delete the pods and namespace created
            self.sa_tiller.delete()
            self.crb_tiller.delete()
            run_cmd(f"oc delete project {tiller_namespace}")
            self.ns_obj.wait_for_delete(resource_name=tiller_namespace)

        if self.kafka_connect:
            self.kafka_connect.delete()
        if self.kafka_bridge:
            self.kafka_bridge.delete()
        if self.kafka_persistent:
            self.kafka_persistent.delete()
            log.info("Waiting for 20 seconds to delete persistent")
            time.sleep(20)
            ocs_pvc_obj = get_all_pvc_objs(namespace=kafka_namespace)
            if ocs_pvc_obj:
                delete_pvcs(ocs_pvc_obj)
            for pvc in ocs_pvc_obj:
                logging.info(pvc.name)
                validate_pv_delete(pvc.backed_pv)

        if self.crd_objects:
            for adm_obj in self.crd_objects:
                adm_obj.delete()
        time.sleep(20)

        # Reset namespace to default
        switch_to_default_rook_cluster_project()
        run_cmd(f"oc delete project {kafka_namespace}")
        self.ns_obj.wait_for_delete(resource_name=kafka_namespace, timeout=90)
Beispiel #14
0
class Postgresql(RipSaw):
    """
    Postgresql workload operation
    """
    def __init__(self, **kwargs):
        """
        Initializer function

        """
        super().__init__(**kwargs)
        self._apply_crd(crd=RIPSAW_CRD)

    def _apply_crd(self, crd):
        """
        Apply the CRD

        Args:
            crd (str): yaml to apply

        """
        RipSaw.apply_crd(self, crd=crd)

    def setup_postgresql(self, replicas):
        """
        Deploy postgres sql server

        Args:
            replicas (int): Number of postgresql pods to be deployed

        Raises:
            CommandFailed: If PostgreSQL server setup fails

        """
        log.info("Deploying postgres database")
        try:
            pgsql_service = templating.load_yaml(constants.PGSQL_SERVICE_YAML)
            pgsql_cmap = templating.load_yaml(constants.PGSQL_CONFIGMAP_YAML)
            pgsql_sset = templating.load_yaml(constants.PGSQL_STATEFULSET_YAML)
            pgsql_sset["spec"]["replicas"] = replicas
            self.pgsql_service = OCS(**pgsql_service)
            self.pgsql_service.create()
            self.pgsql_cmap = OCS(**pgsql_cmap)
            self.pgsql_cmap.create()
            self.pgsql_sset = OCS(**pgsql_sset)
            self.pgsql_sset.create()
            self.pod_obj.wait_for_resource(
                condition="Running",
                selector="app=postgres",
                resource_count=replicas,
                timeout=3600,
            )
        except (CommandFailed, CalledProcessError) as cf:
            log.error("Failed during setup of PostgreSQL server")
            raise cf
        self.pgsql_is_setup = True
        log.info("Successfully deployed postgres database")

    def create_pgbench_benchmark(
        self,
        replicas,
        pgbench_name=None,
        postgres_name=None,
        clients=None,
        threads=None,
        transactions=None,
        scaling_factor=None,
        timeout=None,
        wait=True,
    ):
        """
        Create pgbench benchmark pods

        Args:
            replicas (int): Number of pgbench pods to be deployed
            pgbench_name (str): Name of pgbench bechmark
            postgres_name (str): Name of postgres pod
            clients (int): Number of clients
            threads (int): Number of threads
            transactions (int): Number of transactions
            scaling_factor (int): scaling factor
            timeout (int): Time in seconds to wait
            wait (bool): On true waits till pgbench reaches Completed state

        Returns:
            List: pgbench pod objects list

        """
        pg_obj_list = []
        pgbench_name = pgbench_name if pgbench_name else "pgbench-benchmark"
        postgres_name = postgres_name if postgres_name else "postgres"
        for i in range(replicas):
            log.info("Create resource file for pgbench workload")
            pg_data = templating.load_yaml(constants.PGSQL_BENCHMARK_YAML)
            pg_data["metadata"]["name"] = f"{pgbench_name}" + f"{i}"
            pg_data["spec"]["workload"]["args"]["databases"][0]["host"] = (
                f"{postgres_name}-" + f"{i}" + ".postgres")

            if clients is not None:
                pg_data["spec"]["workload"]["args"]["clients"][0] = clients
            if threads is not None:
                pg_data["spec"]["workload"]["args"]["threads"] = threads
            if transactions is not None:
                pg_data["spec"]["workload"]["args"][
                    "transactions"] = transactions
            if scaling_factor is not None:
                pg_data["spec"]["workload"]["args"][
                    "scaling_factor"] = scaling_factor
            pg_obj = OCS(**pg_data)
            pg_obj_list.append(pg_obj)
            pg_obj.create()

        if wait:
            # Confirm that expected pgbench pods are spinned
            log.info("Searching the pgbench pods by its name pattern")
            timeout = timeout if timeout else 300
            for pgbench_pods in TimeoutSampler(
                    timeout,
                    replicas,
                    get_pod_name_by_pattern,
                    "pgbench-1-dbs-client",
                    RIPSAW_NAMESPACE,
            ):
                try:
                    if len(pgbench_pods) == replicas:
                        log.info(f"Expected number of pgbench pods are "
                                 f"found: {replicas}")
                        break
                except IndexError:
                    log.info(f"Expected number of pgbench pods are {replicas} "
                             f"but only found {len(pgbench_pods)}")
        return pg_obj_list

    def get_postgres_pvc(self):
        """
        Get all postgres pvc

        Returns:
             List: postgres pvc objects list
        """
        return get_all_pvc_objs(namespace=RIPSAW_NAMESPACE)

    def get_postgres_pods(self):
        """
        Get all postgres pods
        Returns:
            List: postgres pod objects list
        """
        return get_all_pods(namespace=RIPSAW_NAMESPACE, selector=["postgres"])

    def get_pgbench_pods(self):
        """
        Get all pgbench pods

        Returns:
            List: pgbench pod objects list

        """
        return [
            get_pod_obj(pod, RIPSAW_NAMESPACE)
            for pod in get_pod_name_by_pattern("pgbench", RIPSAW_NAMESPACE)
        ]

    def delete_pgbench_pods(self, pg_obj_list):
        """
        Delete all pgbench pods on cluster

        Returns:
            bool: True if deleted, False otherwise

        """
        log.info("Delete pgbench Benchmark")
        for pgbench_pod in pg_obj_list:
            pgbench_pod.delete(force=True)

    def is_pgbench_running(self):
        """
        Check if pgbench is running

        Returns:
            bool: True if pgbench is running; False otherwise

        """
        pod_objs = self.get_pgbench_pods()
        for pod in pod_objs:
            if (pod.get().get("status").get("containerStatuses")[0].get(
                    "state") == "running"):
                log.info("One or more pgbench pods are in running state")
                return True
            else:
                return False
            break

    def get_pgbench_status(self, pgbench_pod_name):
        """
        Get pgbench status

        Args:
            pgbench_pod_name (str): Name of the pgbench pod

        Returns:
            str: state of pgbench pod (running/completed)

        """
        pod_obj = get_pod_obj(pgbench_pod_name, namespace=RIPSAW_NAMESPACE)
        status = pod_obj.get().get("status").get("containerStatuses")[0].get(
            "state")

        return ("running" if list(status.keys())[0] == "running" else
                status["terminated"]["reason"])

    def wait_for_postgres_status(self,
                                 status=constants.STATUS_RUNNING,
                                 timeout=300):
        """
        Wait for postgres pods status to reach running/completed

        Args:
            status (str): status to reach Running or Completed
            timeout (int): Time in seconds to wait

        """
        log.info(f"Waiting for postgres pods to be reach {status} state")
        postgres_pod_objs = self.get_postgres_pods()
        for postgres_pod_obj in postgres_pod_objs:
            wait_for_resource_state(resource=postgres_pod_obj,
                                    state=status,
                                    timeout=timeout)

    def wait_for_pgbench_status(self, status, timeout=None):
        """
        Wait for pgbench benchmark pods status to reach running/completed

        Args:
            status (str): status to reach Running or Completed
            timeout (int): Time in seconds to wait

        """
        """
        Sometimes with the default values in the benchmark yaml the pgbench pod is not
        getting completed within the specified time and the tests are failing.
        I think it is varying with the infrastructure.
        So, for now we set the timeout to 30 mins and will start monitoring each pg bench
        pods for each run.Based on the results we will define the timeout again
        """
        timeout = timeout if timeout else 1800
        # Wait for pg_bench pods to initialized and running
        log.info(f"Waiting for pgbench pods to be reach {status} state")
        pgbench_pod_objs = self.get_pgbench_pods()
        for pgbench_pod_obj in pgbench_pod_objs:
            try:
                wait_for_resource_state(resource=pgbench_pod_obj,
                                        state=status,
                                        timeout=timeout)
            except ResourceWrongStatusException:
                output = run_cmd(f"oc logs {pgbench_pod_obj.name}")
                error_msg = f"{pgbench_pod_obj.name} did not reach to {status} state after {timeout} sec\n{output}"
                log.error(error_msg)
                raise UnexpectedBehaviour(error_msg)

    def validate_pgbench_run(self, pgbench_pods, print_table=True):
        """
        Validate pgbench run

        Args:
            pgbench pods (list): List of pgbench pods

        Returns:
            pg_output (list): pgbench outputs in list

        """
        all_pgbench_pods_output = []
        for pgbench_pod in pgbench_pods:
            log.info(f"pgbench_client_pod===={pgbench_pod.name}====")
            output = run_cmd(
                f"oc logs {pgbench_pod.name} -n {RIPSAW_NAMESPACE}")
            pg_output = utils.parse_pgsql_logs(output)
            log.info("*******PGBench output log*********\n" f"{pg_output}")
            # for data in all_pgbench_pods_output:
            for data in pg_output:
                run_id = list(data.keys())
                latency_avg = data[run_id[0]]["latency_avg"]
                if not latency_avg:
                    raise UnexpectedBehaviour("PGBench failed to run, "
                                              "no data found on latency_avg")
            log.info(f"PGBench on {pgbench_pod.name} completed successfully")
            all_pgbench_pods_output.append((pg_output, pgbench_pod.name))

        if print_table:
            pgbench_pod_table = PrettyTable()
            pgbench_pod_table.field_names = [
                "pod_name",
                "scaling_factor",
                "num_clients",
                "num_threads",
                "trans_client",
                "actually_trans",
                "latency_avg",
                "lat_stddev",
                "tps_incl",
                "tps_excl",
            ]
            for pgbench_pod_out in all_pgbench_pods_output:
                for pod_output in pgbench_pod_out[0]:
                    for pod in pod_output.values():
                        pgbench_pod_table.add_row([
                            pgbench_pod_out[1],
                            pod["scaling_factor"],
                            pod["num_clients"],
                            pod["num_threads"],
                            pod["number_of_transactions_per_client"],
                            pod["number_of_transactions_actually_processed"],
                            pod["latency_avg"],
                            pod["lat_stddev"],
                            pod["tps_incl"],
                            pod["tps_excl"],
                        ])
            log.info(f"\n{pgbench_pod_table}\n")

        return all_pgbench_pods_output

    def get_pgsql_nodes(self):
        """
        Get nodes that contain a pgsql app pod

        Returns:
            list: Cluster node OCP objects

        """
        pgsql_pod_objs = self.pod_obj.get(selector=constants.PGSQL_APP_LABEL,
                                          all_namespaces=True)
        log.info("Create a list of nodes that contain a pgsql app pod")
        nodes_set = set()
        for pod in pgsql_pod_objs["items"]:
            log.info(f"pod {pod['metadata']['name']} located on "
                     f"node {pod['spec']['nodeName']}")
            nodes_set.add(pod["spec"]["nodeName"])
        return list(nodes_set)

    def respin_pgsql_app_pod(self):
        """
        Respin the pgsql app pod

        Returns:
            pod status

        """
        app_pod_list = get_operator_pods(constants.PGSQL_APP_LABEL,
                                         constants.RIPSAW_NAMESPACE)
        app_pod = app_pod_list[random.randint(0, len(app_pod_list) - 1)]
        log.info(f"respin pod {app_pod.name}")
        app_pod.delete(wait=True, force=False)
        wait_for_resource_state(resource=app_pod,
                                state=constants.STATUS_RUNNING,
                                timeout=300)

    def get_pgbech_pod_status_table(self, pgbench_pods):
        """
        Get pgbench pod data and print results on a table

        Args:
            pgbench pods (list): List of pgbench pods

        """
        pgbench_pod_table = PrettyTable()
        pgbench_pod_table.field_names = [
            "pod_name",
            "scaling_factor",
            "num_clients",
            "num_threads",
            "trans_client",
            "actually_trans",
            "latency_avg",
            "lat_stddev",
            "tps_incl",
            "tps_excl",
        ]
        for pgbench_pod in pgbench_pods:
            output = run_cmd(f"oc logs {pgbench_pod.name}")
            pg_output = utils.parse_pgsql_logs(output)
            for pod_output in pg_output:
                for pod in pod_output.values():
                    pgbench_pod_table.add_row([
                        pgbench_pod.name,
                        pod["scaling_factor"],
                        pod["num_clients"],
                        pod["num_threads"],
                        pod["number_of_transactions_per_client"],
                        pod["number_of_transactions_actually_processed"],
                        pod["latency_avg"],
                        pod["lat_stddev"],
                        pod["tps_incl"],
                        pod["tps_excl"],
                    ])
        log.info(f"\n{pgbench_pod_table}\n")

    def export_pgoutput_to_googlesheet(self, pg_output, sheet_name,
                                       sheet_index):
        """
        Collect pgbench output to google spreadsheet

        Args:
            pg_output (list):  pgbench outputs in list
            sheet_name (str): Name of the sheet
            sheet_index (int): Index of sheet

        """
        # Collect data and export to Google doc spreadsheet
        g_sheet = GoogleSpreadSheetAPI(sheet_name=sheet_name,
                                       sheet_index=sheet_index)
        log.info("Exporting pgoutput data to google spreadsheet")
        for pgbench_pod in range(len(pg_output)):
            for run in range(len(pg_output[pgbench_pod][0])):
                run_id = list(pg_output[pgbench_pod][0][run].keys())[0]
                lat_avg = pg_output[pgbench_pod][0][run][run_id]["latency_avg"]
                lat_stddev = pg_output[pgbench_pod][0][run][run_id][
                    "lat_stddev"]
                tps_incl = pg_output[pgbench_pod][0][run][run_id]["lat_stddev"]
                tps_excl = pg_output[pgbench_pod][0][run][run_id]["tps_excl"]
                g_sheet.insert_row(
                    [
                        f"Pgbench-pod{pg_output[pgbench_pod][1]}-run-{run_id}",
                        int(lat_avg),
                        int(lat_stddev),
                        int(tps_incl),
                        int(tps_excl),
                    ],
                    2,
                )
        g_sheet.insert_row(
            ["", "latency_avg", "lat_stddev", "lat_stddev", "tps_excl"], 2)

        # Capturing versions(OCP, OCS and Ceph) and test run name
        g_sheet.insert_row(
            [
                f"ocp_version:{utils.get_cluster_version()}",
                f"ocs_build_number:{utils.get_ocs_build_number()}",
                f"ceph_version:{utils.get_ceph_version()}",
                f"test_run_name:{utils.get_testrun_name()}",
            ],
            2,
        )

    def cleanup(self):
        """
        Clean up

        """
        log.info("Deleting postgres pods and configuration")
        if self.pgsql_is_setup:
            self.pgsql_sset.delete()
            self.pgsql_cmap.delete()
            self.pgsql_service.delete()
        log.info("Deleting pgbench pods")
        pods_obj = self.get_pgbench_pods()
        for pod in pods_obj:
            pod.delete()
            pod.ocp.wait_for_delete(pod.name)
        log.info("Deleting ripsaw configuration")
        RipSaw.cleanup(self)

    def attach_pgsql_pod_to_claim_pvc(self,
                                      pvc_objs,
                                      postgres_name,
                                      run_benchmark=True,
                                      pgbench_name=None):
        """
        Attaches pgsql pod to created claim PVC

        Args:
            pvc_objs (list): List of PVC objs which needs to attached to pod
            postgres_name (str): Name of the postgres pod
            run_benchmark (bool): On true, runs pgbench benchmark on postgres pod
            pgbench_name (str): Name of pgbench benchmark

        Returns:
            pgsql_obj_list (list): List of pod objs created

        """
        pgsql_obj_list = []
        for pvc_obj in pvc_objs:
            try:
                pgsql_sset = templating.load_yaml(
                    constants.PGSQL_STATEFULSET_YAML)
                del pgsql_sset["spec"]["volumeClaimTemplates"]
                pgsql_sset["metadata"]["name"] = (f"{postgres_name}" +
                                                  f"{pvc_objs.index(pvc_obj)}")
                pgsql_sset["spec"]["template"]["spec"]["containers"][0][
                    "volumeMounts"][0]["name"] = pvc_obj.name
                pgsql_sset["spec"]["template"]["spec"]["volumes"] = [{
                    "name": f"{pvc_obj.name}",
                    "persistentVolumeClaim": {
                        "claimName": f"{pvc_obj.name}"
                    },
                }]
                pgsql_sset = OCS(**pgsql_sset)
                pgsql_sset.create()
                pgsql_obj_list.append(pgsql_sset)

                self.wait_for_postgres_status(status=constants.STATUS_RUNNING,
                                              timeout=300)

                if run_benchmark:
                    pg_data = templating.load_yaml(
                        constants.PGSQL_BENCHMARK_YAML)
                    pg_data["metadata"]["name"] = (
                        f"{pgbench_name}" +
                        f"{pvc_objs.index(pvc_obj)}" if pgbench_name else
                        create_unique_resource_name("benchmark", "pgbench"))
                    pg_data["spec"]["workload"]["args"]["databases"][0][
                        "host"] = (f"{postgres_name}" +
                                   f"{pvc_objs.index(pvc_obj)}-0" +
                                   ".postgres")
                    pg_obj = OCS(**pg_data)
                    pg_obj.create()
                    pgsql_obj_list.append(pg_obj)

                    wait_time = 120
                    log.info(f"Wait {wait_time} seconds before mounting pod")
                    time.sleep(wait_time)

            except (CommandFailed, CalledProcessError) as cf:
                log.error("Failed during creation of postgres pod")
                raise cf

        if run_benchmark:
            log.info("Checking all pgbench benchmark reached Completed state")
            self.wait_for_pgbench_status(status=constants.STATUS_COMPLETED,
                                         timeout=1800)

        return pgsql_obj_list

    def get_postgres_used_file_space(self, pod_obj_list):
        """
        Get the used file space on a mount point

        Args:
            pod_obj_list (POD): List of pod objects

        Returns:
            list: List of pod object

        """
        # Get the used file space on a mount point
        for pod_obj in pod_obj_list:
            filepath = get_file_path(pod_obj, "pgdata")
            filespace = pod_obj.exec_cmd_on_pod(command=f"du -sh {filepath}",
                                                out_yaml_format=False)
            filespace = filespace.split()[0]
            pod_obj.filespace = filespace
        return pod_obj_list
Beispiel #15
0
    def test_fio_workload_simple(self, ripsaw, es, interface, io_pattern):
        """
        This is a basic fio perf test

        """

        # Deployment ripsaw
        log.info("Deploying ripsaw operator")
        ripsaw.apply_crd("resources/crds/" "ripsaw_v1alpha1_ripsaw_crd.yaml")
        if interface == "CephBlockPool":
            sc = constants.CEPHBLOCKPOOL_SC
        else:
            sc = constants.CEPHFILESYSTEM_SC

        # Create fio benchmark
        log.info("Create resource file for fio workload")
        fio_cr = templating.load_yaml(constants.FIO_CR_YAML)

        # Saving the Original elastic-search IP and PORT - if defined in yaml
        if "elasticsearch" in fio_cr["spec"]:
            backup_es = fio_cr["spec"]["elasticsearch"]
        else:
            log.warning(
                "Elastic Search information does not exists in YAML file")
            fio_cr["spec"]["elasticsearch"] = {}

        # Use the internal define elastic-search server in the test - if exist
        if es:
            fio_cr["spec"]["elasticsearch"] = {
                "server": es.get_ip(),
                "port": es.get_port(),
            }

        # Setting the data set to 40% of the total storage capacity
        ceph_cluster = CephCluster()
        ceph_capacity = ceph_cluster.get_ceph_capacity()
        total_data_set = int(ceph_capacity * 0.4)
        filesize = int(fio_cr["spec"]["workload"]["args"]["filesize"].replace(
            "GiB", ""))
        # To make sure the number of App pods will not be more then 50, in case
        # of large data set, changing the size of the file each pod will work on
        if total_data_set > 500:
            filesize = int(ceph_capacity * 0.008)
            fio_cr["spec"]["workload"]["args"]["filesize"] = f"{filesize}GiB"
            # make sure that the storage size is larger then the file size
            fio_cr["spec"]["workload"]["args"][
                "storagesize"] = f"{int(filesize * 1.2)}Gi"
        fio_cr["spec"]["workload"]["args"]["servers"] = int(total_data_set /
                                                            filesize)
        log.info(f"Total Data set to work on is : {total_data_set} GiB")

        environment = get_environment_info()
        if not environment["user"] == "":
            fio_cr["spec"]["test_user"] = environment["user"]
        fio_cr["spec"]["clustername"] = environment["clustername"]

        log.debug(f"Environment information is : {environment}")

        fio_cr["spec"]["workload"]["args"]["storageclass"] = sc
        if io_pattern == "sequential":
            fio_cr["spec"]["workload"]["args"]["jobs"] = ["write", "read"]
            fio_cr["spec"]["workload"]["args"]["iodepth"] = 1
        log.info(f"The FIO CR file is {fio_cr}")
        fio_cr_obj = OCS(**fio_cr)
        fio_cr_obj.create()

        # Wait for fio client pod to be created
        for fio_pod in TimeoutSampler(300, 20, get_pod_name_by_pattern,
                                      "fio-client",
                                      constants.RIPSAW_NAMESPACE):
            try:
                if fio_pod[0] is not None:
                    fio_client_pod = fio_pod[0]
                    break
            except IndexError:
                log.info("Bench pod not ready yet")

        # Getting the start time of the test
        start_time = time.strftime("%Y-%m-%dT%H:%M:%SGMT", time.gmtime())

        # Getting the UUID from inside the benchmark pod
        uuid = ripsaw.get_uuid(fio_client_pod)
        # Setting back the original elastic-search information
        fio_cr["spec"]["elasticsearch"] = backup_es

        full_results = FIOResultsAnalyse(uuid, fio_cr)

        # Initialize the results doc file.
        for key in environment:
            full_results.add_key(key, environment[key])

        # Setting the global parameters of the test
        full_results.add_key("io_pattern", io_pattern)
        full_results.add_key("dataset", f"{total_data_set}GiB")
        full_results.add_key("file_size",
                             fio_cr["spec"]["workload"]["args"]["filesize"])
        full_results.add_key("servers",
                             fio_cr["spec"]["workload"]["args"]["servers"])
        full_results.add_key("samples",
                             fio_cr["spec"]["workload"]["args"]["samples"])
        full_results.add_key("operations",
                             fio_cr["spec"]["workload"]["args"]["jobs"])
        full_results.add_key("block_sizes",
                             fio_cr["spec"]["workload"]["args"]["bs"])
        full_results.add_key("io_depth",
                             fio_cr["spec"]["workload"]["args"]["iodepth"])
        full_results.add_key("jobs",
                             fio_cr["spec"]["workload"]["args"]["numjobs"])
        full_results.add_key(
            "runtime",
            {
                "read": fio_cr["spec"]["workload"]["args"]["read_runtime"],
                "write": fio_cr["spec"]["workload"]["args"]["write_runtime"],
            },
        )
        full_results.add_key(
            "storageclass", fio_cr["spec"]["workload"]["args"]["storageclass"])
        full_results.add_key("vol_size",
                             fio_cr["spec"]["workload"]["args"]["storagesize"])

        # Wait for fio pod to initialized and complete
        log.info("Waiting for fio_client to complete")
        pod_obj = OCP(kind="pod")
        pod_obj.wait_for_resource(
            condition="Completed",
            resource_name=fio_client_pod,
            timeout=18000,
            sleep=300,
        )

        # Getting the end time of the test
        end_time = time.strftime("%Y-%m-%dT%H:%M:%SGMT", time.gmtime())
        full_results.add_key("test_time", {
            "start": start_time,
            "end": end_time
        })

        output = run_cmd(f"oc logs {fio_client_pod}")
        log.info(f"The Test log is : {output}")

        try:
            if "Fio failed to execute" not in output:
                log.info("FIO has completed successfully")
        except IOError:
            log.info("FIO failed to complete")

        # Clean up fio benchmark
        log.info("Deleting FIO benchmark")
        fio_cr_obj.delete()

        log.debug(f"Full results is : {full_results.results}")

        # if Internal ES is exists, Copy all data from the Internal to main ES
        if es:
            log.info("Copy all data from Internal ES to Main ES")
            es._copy(full_results.es)
        # Adding this sleep between the copy and the analyzing of the results
        # since sometimes the results of the read (just after write) are empty
        time.sleep(30)
        full_results.analyze_results()  # Analyze the results
        # Writing the analyzed test results to the Elastic-Search server
        full_results.es_write()
        full_results.codespeed_push()  # Push results to codespeed
        # Creating full link to the results on the ES server
        log.info(f"The Result can be found at ; {full_results.results_link()}")
    def test_pvc_snapshot_performance_multiple_files(self, ripsaw, file_size,
                                                     files, threads,
                                                     interface):
        """
        Run SmallFile Workload and the take snapshot.
        test will run with 1M, 2M and 4M of file on the volume - total data set
        is the same for all tests, ~30GiB, and then take snapshot and measure
        the time it takes.
        the test will run 3 time to check consistency.

        Args:
            ripsaw : benchmark operator fixture which will run the workload
            file_size (int): the size of the file to be create - in KiB
            files (int): number of files each thread will create
            threads (int): number of threads will be used in the workload
            interface (str): the volume interface that will be used
                             CephBlockPool / CephFileSystem

        Raises:
            TimeoutError : in case of creation files take too long time
                           more then 2 Hours

        """

        # Loading the main template yaml file for the benchmark and update some
        # fields with new values
        sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

        if interface == constants.CEPHBLOCKPOOL:
            storageclass = constants.DEFAULT_STORAGECLASS_RBD
        else:
            storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS
        log.info(f"Using {storageclass} Storageclass")

        # Setting up the parameters for this test
        sf_data["spec"]["workload"]["args"]["samples"] = 1
        sf_data["spec"]["workload"]["args"]["operation"] = ["create"]
        sf_data["spec"]["workload"]["args"]["file_size"] = file_size
        sf_data["spec"]["workload"]["args"]["files"] = files
        sf_data["spec"]["workload"]["args"]["threads"] = threads
        sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass
        del sf_data["spec"]["elasticsearch"]
        """
        Calculating the size of the volume that need to be test, it should
        be at least twice in the size then the size of the files, and at
        least 100Gi.

        Since the file_size is in Kb and the vol_size need to be in Gb, more
        calculation is needed.
        """
        total_files = int(files * threads)
        total_data = int(files * threads * file_size / constants.GB2KB)
        data_set = int(total_data * 3)  # calculate data with replica
        vol_size = data_set if data_set >= 100 else 100
        sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi"

        environment = get_environment_info()
        if not environment["user"] == "":
            sf_data["spec"]["test_user"] = environment["user"]
        else:
            # since full results object need this parameter, initialize it from CR file
            environment["user"] = sf_data["spec"]["test_user"]

        sf_data["spec"]["clustername"] = environment["clustername"]
        log.debug(f"The smallfile yaml file is {sf_data}")

        # Deploy the ripsaw operator
        log.info("Apply Operator CRD")
        ripsaw.apply_crd("resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml")

        all_results = []

        for test_num in range(self.tests_numbers):

            # deploy the smallfile workload
            log.info("Running SmallFile bench")
            sf_obj = OCS(**sf_data)
            sf_obj.create()

            # wait for benchmark pods to get created - takes a while
            for bench_pod in TimeoutSampler(
                    240,
                    10,
                    get_pod_name_by_pattern,
                    "smallfile-client",
                    constants.RIPSAW_NAMESPACE,
            ):
                try:
                    if bench_pod[0] is not None:
                        small_file_client_pod = bench_pod[0]
                        break
                except IndexError:
                    log.info("Bench pod not ready yet")

            bench_pod = OCP(kind="pod", namespace=constants.RIPSAW_NAMESPACE)
            log.info("Waiting for SmallFile benchmark to Run")
            assert bench_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=small_file_client_pod,
                sleep=30,
                timeout=600,
            )
            for item in bench_pod.get()["items"][1]["spec"]["volumes"]:
                if "persistentVolumeClaim" in item:
                    pvc_name = item["persistentVolumeClaim"]["claimName"]
                    break
            log.info(f"Benchmark PVC name is : {pvc_name}")
            # Creation of 4M files on CephFS can take a lot of time
            timeout = 7200
            while timeout >= 0:
                logs = bench_pod.get_logs(name=small_file_client_pod)
                if "RUN STATUS DONE" in logs:
                    break
                timeout -= 30
                if timeout == 0:
                    raise TimeoutError(
                        "Timed out waiting for benchmark to complete")
                time.sleep(30)
            log.info(f"Smallfile test ({test_num + 1}) finished.")
            snap_name = pvc_name.replace("claim", "snapshot-")
            log.info(f"Taking snapshot of the PVC {pvc_name}")
            log.info(f"Snapshot name : {snap_name}")
            creation_time = self.measure_create_snapshot_time(
                pvc_name=pvc_name, snap_name=snap_name, interface=interface)
            log.info(f"Snapshot creation time is {creation_time} seconds")
            all_results.append(creation_time)

            # Delete the smallfile workload
            log.info("Deleting the smallfile workload")
            if sf_obj.delete(wait=True):
                log.info("The smallfile workload was deleted successfully")

            # Delete VolumeSnapshots
            log.info("Deleting the snapshots")
            if self.snap_obj.delete(wait=True):
                log.info("The snapshot deleted successfully")
            log.info("Verify (and wait if needed) that ceph health is OK")
            ceph_health_check(tries=45, delay=60)

        log.info(f"Full test report for {interface}:")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All results are {all_results}")
        log.info(
            f"The average creation time is : {statistics.mean(all_results)}")
        log.info(f"Number of Files on the volume : {total_files:,}, "
                 f"Total dataset : {int(data_set / 3)} GiB")
Beispiel #17
0
class CouchBase(PillowFight):
    """
    CouchBase workload operation
    """
    def __init__(self, **kwargs):
        """
        Initializer function

        """
        super().__init__(**kwargs)
        self.args = kwargs
        self.pod_obj = OCP(kind="pod")
        self.ns_obj = OCP(kind="namespace")
        self.couchbase_pod = OCP(kind="pod")
        self.create_namespace(namespace=constants.COUCHBASE_OPERATOR)
        self.cb_create_cb_secret = False
        self.cb_create_cb_cluster = False
        self.cb_create_bucket = False

    def create_namespace(self, namespace):
        """
        create namespace for couchbase

        Args:
            namespace (str): Namespace for deploying couchbase pods

        """
        try:
            self.ns_obj.new_project(namespace)
        except CommandFailed as ef:
            log.info("Already present")
            if f'project.project.openshift.io "{namespace}" already exists' not in str(
                    ef):
                raise ef

    def couchbase_operatorgroup(self):
        """
        Creates an operator group for Couchbase

        """
        operatorgroup_yaml = templating.load_yaml(
            constants.COUCHBASE_OPERATOR_GROUP_YAML)
        self.operatorgroup_yaml = OCS(**operatorgroup_yaml)
        self.operatorgroup_yaml.create()

    def couchbase_subscription(self):
        """
        Creates subscription for Couchbase operator

        """
        # Create an operator group for Couchbase
        log.info("Creating operator group for couchbase")
        self.couchbase_operatorgroup()
        subscription_yaml = templating.load_yaml(
            constants.COUCHBASE_OPERATOR_SUBSCRIPTION_YAML)
        self.subscription_yaml = OCS(**subscription_yaml)
        self.subscription_yaml.create()

        # Wait for the CSV to reach succeeded state
        cb_csv = self.get_couchbase_csv()
        cb_csv_obj = CSV(resource_name=cb_csv,
                         namespace=constants.COUCHBASE_OPERATOR)
        cb_csv_obj.wait_for_phase("Succeeded", timeout=720)

    def get_couchbase_csv(self):
        """ "
        Get the Couchbase CSV object

        Returns:
            CSV: Couchbase CSV object

        Raises:
            CSVNotFound: In case no CSV found.

        """
        cb_package_manifest = PackageManifest(
            resource_name="couchbase-enterprise-certified")
        cb_enter_csv = cb_package_manifest.get_current_csv(
            channel="stable", csv_pattern=constants.COUCHBASE_CSV_PREFIX)
        return cb_enter_csv

    def create_cb_secrets(self):
        """ "
        Create secrets for running Couchbase workers

        """
        cb_secrets = templating.load_yaml(constants.COUCHBASE_WORKER_SECRET)
        self.cb_secrets = OCS(**cb_secrets)
        self.cb_secrets.create()
        log.info("Successfully created secrets for Couchbase")
        self.cb_create_cb_secret = True

    def create_cb_cluster(self, replicas=1, sc_name=None):
        """
        Deploy a Couchbase server using Couchbase operator

        Once the couchbase operator is running, we need to wait for the
        worker pods to be up.  Once the Couchbase worker pods are up, pillowfight
        task is started.

        After the pillowfight task has finished, the log is collected and
        analyzed.

        Raises:
            Exception: If pillowfight results indicate that a minimum performance
                level is not reached (1 second response time, less than 1000 ops
                per second)

        """
        log.info("Creating Couchbase worker pods...")
        cb_example = templating.load_yaml(constants.COUCHBASE_WORKER_EXAMPLE)

        if storagecluster_independent_check():
            cb_example["spec"]["volumeClaimTemplates"][0]["spec"][
                "storageClassName"] = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD
        cb_example["spec"]["servers"][0]["size"] = replicas
        if sc_name:
            cb_example["spec"]["volumeClaimTemplates"][0]["spec"][
                "storageClassName"] = sc_name
        self.cb_example = OCS(**cb_example)
        self.cb_example.create()
        self.cb_create_cb_cluster = True

        # Wait for the Couchbase workers to be running.

        log.info("Waiting for the Couchbase pods to be Running")
        self.pod_obj.wait_for_resource(
            condition="Running",
            selector="app=couchbase",
            resource_count=replicas,
            timeout=900,
        )
        log.info(
            f"Expected number: {replicas} of couchbase workers reached running state"
        )

    def create_data_buckets(self):
        """
        Create data buckets

        """
        cb_bucket = templating.load_yaml(constants.COUCHBASE_DATA_BUCKET)
        self.cb_bucket = OCS(**cb_bucket)
        self.cb_bucket.create()
        log.info("Successfully created data buckets")
        self.cb_create_bucket = True

    def run_workload(self,
                     replicas,
                     num_items=None,
                     num_threads=None,
                     run_in_bg=False):
        """
        Running workload with pillow fight operator
        Args:
            replicas (int): Number of pods
            num_items (int): Number of items to be loaded to the cluster
            num_threads (int): Number of threads
            run_in_bg (bool) : Optional run IOs in background

        """
        self.result = None
        log.info("Running IOs using Pillow-fight")
        if run_in_bg:
            executor = ThreadPoolExecutor(1)
            self.result = executor.submit(
                PillowFight.run_pillowfights,
                self,
                replicas=replicas,
                num_items=num_items,
                num_threads=num_threads,
            )
            return self.result
        PillowFight.run_pillowfights(self,
                                     replicas=replicas,
                                     num_items=num_items,
                                     num_threads=num_threads)

    def analyze_run(self, skip_analyze=False):
        """
        Analyzing the workload run logs

        Args:
            skip_analyze (bool): Option to skip logs analysis

        """
        if not skip_analyze:
            log.info("Analyzing  workload run logs..")
            PillowFight.analyze_all(self)

    def respin_couchbase_app_pod(self):
        """
        Respin the couchbase app pod

        Returns:
            pod status

        """
        app_pod_list = get_pod_name_by_pattern("cb-example",
                                               constants.COUCHBASE_OPERATOR)
        app_pod = app_pod_list[random.randint(0, len(app_pod_list) - 1)]
        log.info(f"respin pod {app_pod}")
        app_pod_obj = get_pod_obj(app_pod,
                                  namespace=constants.COUCHBASE_OPERATOR)
        app_pod_obj.delete(wait=True, force=False)
        wait_for_resource_state(resource=app_pod_obj,
                                state=constants.STATUS_RUNNING,
                                timeout=300)

    def get_couchbase_nodes(self):
        """
        Get nodes that contain a couchbase app pod

        Returns:
            list: List of nodes

        """
        app_pods_list = get_pod_name_by_pattern("cb-example",
                                                constants.COUCHBASE_OPERATOR)
        app_pod_objs = list()
        for pod in app_pods_list:
            app_pod_objs.append(
                get_pod_obj(pod, namespace=constants.COUCHBASE_OPERATOR))
        log.info("Create a list of nodes that contain a couchbase app pod")
        nodes_set = set()
        for pod in app_pod_objs:
            log.info(f"pod {pod.name} located on "
                     f"node {pod.get().get('spec').get('nodeName')}")
            nodes_set.add(pod.get().get("spec").get("nodeName"))
        return list(nodes_set)

    def teardown(self):
        """
        Cleaning up the resources created during Couchbase deployment

        """
        if self.cb_create_cb_secret:
            self.cb_secrets.delete()
        if self.cb_create_cb_cluster:
            self.cb_example.delete()
        if self.cb_create_bucket:
            self.cb_bucket.delete()
        self.subscription_yaml.delete()
        switch_to_project("default")
        self.ns_obj.delete_project(constants.COUCHBASE_OPERATOR)
        self.ns_obj.wait_for_delete(resource_name=constants.COUCHBASE_OPERATOR,
                                    timeout=90)
        PillowFight.cleanup(self)
        switch_to_default_rook_cluster_project()
Beispiel #18
0
class CouchBase(PillowFight):
    """
    CouchBase workload operation
    """
    WAIT_FOR_TIME = 1800
    admission_parts = [
        constants.COUCHBASE_ADMISSION_SERVICE_ACCOUNT_YAML,
        constants.COUCHBASE_ADMISSION_CLUSTER_ROLE_YAML,
        constants.COUCHBASE_ADMISSION_CLUSTER_ROLE_BINDING_YAML,
        constants.COUCHBASE_ADMISSION_SECRET_YAML,
        constants.COUCHBASE_ADMISSION_DEPLOYMENT_YAML,
        constants.COUCHBASE_ADMISSION_SERVICE_YAML,
        constants.COUCHBASE_MUTATING_WEBHOOK_YAML,
        constants.COUCHBASE_VALIDATING_WEBHOOK_YAML
    ]
    pod_obj = OCP(kind='pod')
    couchbase_pod = OCP(kind='pod')
    secretsadder = OCP(kind='pod')
    admission_pod = []
    cb_worker = OCS()
    cb_examples = OCS()

    def __init__(self, **kwargs):
        """
        Initializer function

        """
        super().__init__(**kwargs)

    def is_up_and_running(self, pod_name, ocp_value):
        """
        Test if the pod specified is up and running.

        Args:
            pod_name (str): Name of pod being checked.
            ocp_value (object): object used for running oc commands

        Returns:
            bool; True if pod is running, False otherwise

        """
        if not pod_name:
            return False
        pod_info = ocp_value.exec_oc_cmd(f"get pods {pod_name} -o json")
        if pod_info['status']['containerStatuses'][0]['ready']:
            if 'running' in pod_info['status']['containerStatuses'][0][
                    'state']:
                return True
        return False

    def setup_cb(self):
        """
        Creating admission parts,couchbase operator pod, couchbase worker secret

        """
        # Create admission controller
        log.info("Create admission controller process for Couchbase")
        switch_to_project('default')
        self.up_adm_chk = OCP(namespace="default")
        self.up_check = OCP(namespace=constants.COUCHBASE_OPERATOR)
        for adm_yaml in self.admission_parts:
            adm_data = templating.load_yaml(adm_yaml)
            adm_obj = OCS(**adm_data)
            adm_obj.create()

        # Wait for admission pod to be created
        for adm_pod in TimeoutSampler(self.WAIT_FOR_TIME, 3,
                                      get_pod_name_by_pattern,
                                      'couchbase-operator-admission',
                                      'default'):
            try:
                if self.is_up_and_running(adm_pod[0], self.up_adm_chk):
                    self.admission_pod = adm_pod[0]
                    break
            except IndexError:
                log.info("Admission pod is not ready yet")

        # Wait for admission pod to be running
        log.info("Waiting for admission pod to be running")
        self.pod_obj.wait_for_resource(
            condition='Running',
            resource_name=self.admission_pod,
            timeout=self.WAIT_FOR_TIME,
            sleep=10,
        )
        self.pod_obj.new_project(constants.COUCHBASE_OPERATOR)
        couchbase_data = templating.load_yaml(constants.COUCHBASE_CRD_YAML)
        self.couchbase_obj = OCS(**couchbase_data)
        self.couchbase_obj.create()
        op_data = templating.load_yaml(constants.COUCHBASE_OPERATOR_ROLE)
        self.operator_role = OCS(**op_data)
        self.operator_role.create()
        self.serviceaccount = OCP(namespace=constants.COUCHBASE_OPERATOR)
        self.serviceaccount.exec_oc_cmd(
            "create serviceaccount couchbase-operator")

        dockercfgs = self.serviceaccount.exec_oc_cmd("get secrets")
        startloc = dockercfgs.find('couchbase-operator-dockercfg')
        newdockerstr = dockercfgs[startloc:]
        endloc = newdockerstr.find(' ')
        dockerstr = newdockerstr[:endloc]
        self.secretsadder.exec_oc_cmd(
            f"secrets link serviceaccount/couchbase-operator secrets/{dockerstr}"
        )
        self.rolebinding = OCP(namespace=constants.COUCHBASE_OPERATOR)
        rolebind_cmd = "".join([
            "create rolebinding couchbase-operator-rolebinding ",
            "--role couchbase-operator ",
            "--serviceaccount couchbase-operator-namespace:couchbase-operator"
        ])
        self.rolebinding.exec_oc_cmd(rolebind_cmd)
        dep_data = templating.load_yaml(constants.COUCHBASE_OPERATOR_DEPLOY)
        self.cb_deploy = OCS(**dep_data)
        self.cb_deploy.create()
        # Wait for couchbase operator pod to be running
        for couchbase_pod in TimeoutSampler(self.WAIT_FOR_TIME, 3,
                                            get_pod_name_by_pattern,
                                            'couchbase-operator',
                                            constants.COUCHBASE_OPERATOR):
            try:
                if self.is_up_and_running(couchbase_pod[0], self.up_check):
                    break
            except IndexError:
                log.info("Couchbase operator is not up")

        cb_work = templating.load_yaml(constants.COUCHBASE_WORKER_SECRET)
        self.cb_worker = OCS(**cb_work)
        self.cb_worker.create()

    def create_couchbase_worker(self, replicas=1):
        """
        Deploy a Couchbase server and pillowfight workload using operator

        The couchbase workers do not come up unless there is an admission controller
        running.  The admission controller is started from the default project prior
        to bringing up the operator.  Secrets, rolebindings and serviceaccounts
        need to also be generated.

        Once the couchbase operator is running, we need to wait for the three
        worker pods to also be up.  Then a pillowfight task is started.

        After the pillowfight task has finished, the log is collected and
        analyzed.

        Raises:
            Exception: If pillowfight results indicate that a minimum performance
                level is not reached (1 second response time, less than 1000 ops
                per second)

        """
        logging.info('Creating pods..')
        cb_example = templating.load_yaml(constants.COUCHBASE_WORKER_EXAMPLE)
        cb_example['spec']['servers'][0]['size'] = replicas
        self.cb_examples = OCS(**cb_example)
        self.cb_examples.create()

        # Wait for last of three workers to be running.

        logging.info('Waiting for the pods to Running')
        for cb_wrk_pods in TimeoutSampler(self.WAIT_FOR_TIME, 3,
                                          get_pod_name_by_pattern,
                                          'cb-example',
                                          constants.COUCHBASE_OPERATOR):
            try:
                if len(cb_wrk_pods) == replicas:
                    counter = 0
                    for cb_pod in cb_wrk_pods:
                        if self.is_up_and_running(cb_pod, self.up_check):
                            counter += 1
                            logging.info(f'Couchbase worker {cb_pod} is up')
                    if counter == replicas:
                        break
            except IndexError:
                logging.info(
                    f'Expected number of couchbase pods are {replicas} '
                    f'but only found {len(cb_wrk_pods)}')

    def run_workload(self,
                     replicas,
                     num_items=None,
                     num_threads=None,
                     run_in_bg=False):
        """
        Running workload with pillow fight operator
        Args:
            replicas (int): Number of pods
            num_items (int): Number of items to be loaded to the cluster
            num_threads (int): Number of threads
            run_in_bg (bool) : Optional run IOs in background

        """
        self.result = None
        logging.info('Running IOs...')
        if run_in_bg:
            executor = ThreadPoolExecutor(1)
            self.result = executor.submit(PillowFight.run_pillowfights,
                                          self,
                                          replicas=replicas,
                                          num_items=num_items,
                                          num_threads=num_threads)
            return self.result
        PillowFight.run_pillowfights(self,
                                     replicas=replicas,
                                     num_items=num_items,
                                     num_threads=num_threads)

    def analyze_run(self, skip_analyze=False):
        """
        Analyzing the workload run logs

        Args:
            skip_analyze (bool): Option to skip logs analysis

        """
        if not skip_analyze:
            logging.info('Analyzing  workload run logs..')
            PillowFight.analyze_all(self)

    def respin_couchbase_app_pod(self):
        """
        Respin the couchbase app pod

        Returns:
            pod status

        """
        app_pod_list = get_pod_name_by_pattern('cb-example',
                                               constants.COUCHBASE_OPERATOR)
        app_pod = app_pod_list[random.randint(0, len(app_pod_list) - 1)]
        logging.info(f"respin pod {app_pod}")
        app_pod_obj = get_pod_obj(app_pod,
                                  namespace=constants.COUCHBASE_OPERATOR)
        app_pod_obj.delete(wait=True, force=False)
        wait_for_resource_state(resource=app_pod_obj,
                                state=constants.STATUS_RUNNING,
                                timeout=300)

    def get_couchbase_nodes(self):
        """
        Get nodes that contain a couchbase app pod

        Returns:
            list: List of nodes

        """
        app_pods_list = get_pod_name_by_pattern('cb-example',
                                                constants.COUCHBASE_OPERATOR)
        app_pod_objs = list()
        for pod in app_pods_list:
            app_pod_objs.append(
                get_pod_obj(pod, namespace=constants.COUCHBASE_OPERATOR))

        log.info("Create a list of nodes that contain a couchbase app pod")
        nodes_set = set()
        for pod in app_pod_objs:
            logging.info(f"pod {pod.name} located on "
                         f"node {pod.get().get('spec').get('nodeName')}")
            nodes_set.add(pod.get().get('spec').get('nodeName'))
        return list(nodes_set)

    def teardown(self):
        """
        Delete objects created in roughly reverse order of how they were created.

        """
        self.cb_examples.delete()
        self.cb_worker.delete()
        self.cb_deploy.delete()
        self.pod_obj.exec_oc_cmd(
            command="delete rolebinding couchbase-operator-rolebinding")
        self.pod_obj.exec_oc_cmd(
            command="delete serviceaccount couchbase-operator")
        self.operator_role.delete()
        self.couchbase_obj.delete()
        switch_to_project('default')
        self.pod_obj.delete_project(constants.COUCHBASE_OPERATOR)
        for adm_yaml in self.admission_parts:
            adm_data = templating.load_yaml(adm_yaml)
            adm_obj = OCS(**adm_data)
            adm_obj.delete()
        # Before the code below was added, the teardown task would sometimes
        # fail with the leftover objects because it would still see one of the
        # couchbase pods.
        for admin_pod in TimeoutSampler(self.WAIT_FOR_TIME, 3,
                                        get_pod_name_by_pattern, 'couchbase',
                                        'default'):
            if admin_pod:
                continue
            else:
                break
        PillowFight.cleanup(self)
        switch_to_default_rook_cluster_project()
Beispiel #19
0
class AMQ(object):
    """
    Workload operation using AMQ
    """

    def __init__(self, **kwargs):
        """
        Initializer function

        Args:
            kwargs (dict):
                Following kwargs are valid
                namespace: namespace for the operator
                repo: AMQ repo where all necessary yaml file are there - a github link
                branch: branch to use from the repo
        """
        self.args = kwargs
        self.repo = self.args.get('repo', constants.OCS_WORKLOADS)
        self.branch = self.args.get('branch', 'master')
        self.namespace = self.args.get('namespace', 'my-project')
        self.amq_is_setup = False
        self.ocp = OCP()
        self.ns_obj = OCP(kind='namespace')
        self.pod_obj = OCP(kind='pod')
        self.kafka_obj = OCP(kind='Kafka')
        self.kafka_connect_obj = OCP(kind="KafkaConnect")
        self.kafka_bridge_obj = OCP(kind="KafkaBridge")
        self._create_namespace()
        self._clone_amq()

    def _create_namespace(self):
        """
        create namespace for amq
        """
        self.ocp.new_project(self.namespace)

    def _clone_amq(self):
        """
        clone the amq repo
        """
        self.dir = tempfile.mkdtemp(prefix='amq_')
        try:
            log.info(f'cloning amq in {self.dir}')
            git_clone_cmd = f'git clone -b {self.branch} {self.repo} '
            run(
                git_clone_cmd,
                shell=True,
                cwd=self.dir,
                check=True
            )
            self.amq_dir = "ocs-workloads/amq/v1/install/cluster-operator"
            self.amq_dir_examples = "ocs-workloads/amq/v1/examples/templates/cluster-operator"
            self.amq_kafka_pers_yaml = "ocs-workloads/amq/v1/kafka-persistent.yaml"
            self.amq_kafka_connect_yaml = "ocs-workloads/amq/v1/kafka-connect.yaml"
            self.amq_kafka_bridge_yaml = "ocs-workloads/amq/v1/kafka-bridge.yaml"

        except (CommandFailed, CalledProcessError)as cf:
            log.error('Error during cloning of amq repository')
            raise cf

    def setup_amq_cluster_operator(self):
        """
        Function to setup amq-cluster_operator,
        the file file is pulling from github
        it will make sure cluster-operator pod is running
        """

        # self.amq_dir = constants.TEMPLATE_DEPLOYMENT_AMQ_CP
        run(f'oc apply -f {self.amq_dir} -n {self.namespace}', shell=True, check=True, cwd=self.dir)
        time.sleep(5)
        # Wait for strimzi-cluster-operator pod to be created
        if self.is_amq_pod_running(pod_pattern="cluster-operator"):
            log.info("strimzi-cluster-operator pod is in running state")
        else:
            raise ResourceWrongStatusException("strimzi-cluster-operator pod is not getting to running state")

        run(f'oc apply -f {self.amq_dir_examples} -n {self.namespace}', shell=True, check=True, cwd=self.dir)
        # checking pod status one more time
        if self.is_amq_pod_running(pod_pattern="cluster-operator"):
            log.info("strimzi-cluster-operator pod is in running state")
        else:
            raise ResourceWrongStatusException("strimzi-cluster-operator pod is not getting to running state")

    def is_amq_pod_running(self, pod_pattern="cluster-operator"):
        """
        The function checks if provided pod_pattern finds a pod and if the status is running or not
        Args:
            pod_pattern (str): the pattern for pod
        Returns:
            bool: status of pod: True if found pod is running
        """
        for pod in TimeoutSampler(
            300, 10, get_pod_name_by_pattern, pod_pattern, self.namespace
        ):
            try:
                if pod[0] is not None:
                    amq_pod = pod[0]
                    break
            except IndexError as ie:
                log.error(pod_pattern + " pod not ready yet")
                raise ie
        # checking pod status
        if (self.pod_obj.wait_for_resource(
            condition='Running',
            resource_name=amq_pod,
            timeout=1600,
            sleep=30,
        )
        ):
            log.info(amq_pod + " pod is up and running")
            return True
        else:
            return False

    def setup_amq_kafka_persistent(self):
        """
        Function to setup amq-kafka-persistent, the file file is pulling from github
        it will make kind: Kafka and will make sure the status is running
        :return: kafka_persistent
        """

        try:
            kafka_persistent = templating.load_yaml(os.path.join(self.dir, self.amq_kafka_pers_yaml))
            self.kafka_persistent = OCS(**kafka_persistent)
            self.kafka_persistent.create()

        except(CommandFailed, CalledProcessError) as cf:
            log.error('Failed during setup of AMQ Kafka-persistent')
            raise cf
        time.sleep(5)
        if self.is_amq_pod_running(pod_pattern="zookeeper"):
            return self.kafka_persistent
        else:
            raise ResourceWrongStatusException("my-cluster-zookeeper Pod is not getting to running state")

    def setup_amq_kafka_connect(self):
        """
        The function is to setup amq-kafka-connect, the yaml file is pulling from github
        it will make kind: KafkaConnect and will make sure the status is running

        Returns: kafka_connect object
        """
        try:
            kafka_connect = templating.load_yaml(os.path.join(self.dir, self.amq_kafka_connect_yaml))
            self.kafka_connect = OCS(**kafka_connect)
            self.kafka_connect.create()
        except(CommandFailed, CalledProcessError) as cf:
            log.error('Failed during setup of AMQ KafkaConnect')
            raise cf

        if self.is_amq_pod_running(pod_pattern="my-connect-cluster-connect"):
            return self.kafka_connect
        else:
            raise ResourceWrongStatusException("my-connect-cluster-connect pod is not getting to running state")

    def setup_amq_kafka_bridge(self):
        """
        Function to setup amq-kafka, the file file is pulling from github
        it will make kind: KafkaBridge and will make sure the pod status is running

        Return: kafka_bridge object
        """
        try:
            kafka_bridge = templating.load_yaml(os.path.join(self.dir, self.amq_kafka_bridge_yaml))
            self.kafka_bridge = OCS(**kafka_bridge)
            self.kafka_bridge.create()
        except(CommandFailed, CalledProcessError) as cf:
            log.error('Failed during setup of AMQ KafkaConnect')
            raise cf
        # Making sure the kafka_bridge is running
        if self.is_amq_pod_running(pod_pattern="my-bridge-bridge"):
            return self.kafka_bridge
        else:
            raise ResourceWrongStatusException("kafka_bridge_pod pod is not getting to running state")

    def setup_amq(self):
        """
        Setup AMQ from local folder,
        function will call all necessary sub functions to make sure amq installation is complete
        """
        self.setup_amq_cluster_operator()
        self.setup_amq_kafka_persistent()
        self.setup_amq_kafka_connect()
        self.setup_amq_kafka_bridge()
        self.amq_is_setup = True
        return self

    def cleanup(self):
        """
        Clean up function,
        will start to delete from amq cluster operator
        then amq-connector, persistent, bridge, at the end it will delete the created namespace
        """
        if self.amq_is_setup:
            self.kafka_persistent.delete()
            self.kafka_connect.delete()
            self.kafka_bridge.delete()
            run_cmd(f'oc delete -f {self.amq_dir}', shell=True, check=True, cwd=self.dir)
            run_cmd(f'oc delete -f {self.amq_dir_examples}', shell=True, check=True, cwd=self.dir)
        run_cmd(f'oc delete project {self.namespace}')
        # Reset namespace to default
        switch_to_default_rook_cluster_project()
        self.ns_obj.wait_for_delete(resource_name=self.namespace)
Beispiel #20
0
    def test_fio_workload_simple(self, ripsaw, es, interface, io_pattern):
        """
        This is a basic fio perf test
        """
        # Deployment ripsaw
        log.info("Deploying ripsaw operator")
        ripsaw.apply_crd('resources/crds/' 'ripsaw_v1alpha1_ripsaw_crd.yaml')
        sc = 'ocs-storagecluster-ceph-rbd' if interface == 'CephBlockPool' else 'ocs-storagecluster-cephfs'

        # Create fio benchmark
        log.info("Create resource file for fio workload")
        fio_cr = templating.load_yaml(constants.FIO_CR_YAML)

        # Saving the Original elastic-search IP and PORT - if defined in yaml
        es_server = ""
        es_port = ""
        if 'elasticsearch' in fio_cr['spec']:
            if 'server' in fio_cr['spec']['elasticsearch']:
                es_server = fio_cr['spec']['elasticsearch']['server']
            if 'port' in fio_cr['spec']['elasticsearch']:
                es_port = fio_cr['spec']['elasticsearch']['port']
        else:
            fio_cr['spec']['elasticsearch'] = {}

        # Use the internal define elastic-search server in the test
        fio_cr['spec']['elasticsearch'] = {
            'server': es.get_ip(),
            'port': es.get_port()
        }

        # Setting the data set to 40% of the total storage capacity but
        # not more then 600GiB
        ceph_cluster = CephCluster()
        total_data_set = int(ceph_cluster.get_ceph_capacity() * 0.4)
        filesize = int(fio_cr['spec']['workload']['args']['filesize'].replace(
            'GiB', ''))
        # To make sure the number of App pods will not be more then 50, in case
        # of large data set, changing the size of the file each pod will work on
        if total_data_set > 500:
            filesize = int(ceph_cluster.get_ceph_capacity() * 0.008)
            fio_cr['spec']['workload']['args']['filesize'] = f'{filesize}GiB'
            # make sure that the storage size is larger then the file size
            fio_cr['spec']['workload']['args'][
                'storagesize'] = f'{int(filesize * 1.2)}Gi'
        fio_cr['spec']['workload']['args']['servers'] = int(total_data_set /
                                                            filesize)
        log.info(f'Total Data set to work on is : {total_data_set} GiB')

        fio_cr['spec']['clustername'] = config.ENV_DATA[
            'platform'] + get_build() + get_ocs_version()
        fio_cr['spec']['test_user'] = get_ocs_version(
        ) + interface + io_pattern
        fio_cr['spec']['workload']['args']['storageclass'] = sc
        if io_pattern == 'sequential':
            fio_cr['spec']['workload']['args']['jobs'] = ['write', 'read']
        log.info(f'fio_cr: {fio_cr}')
        fio_cr_obj = OCS(**fio_cr)
        fio_cr_obj.create()

        # Wait for fio client pod to be created
        for fio_pod in TimeoutSampler(300, 20, get_pod_name_by_pattern,
                                      'fio-client',
                                      constants.RIPSAW_NAMESPACE):
            try:
                if fio_pod[0] is not None:
                    fio_client_pod = fio_pod[0]
                    break
            except IndexError:
                log.info("Bench pod not ready yet")

        # Wait for fio pod to initialized and complete
        log.info("Waiting for fio_client to complete")
        pod_obj = OCP(kind='pod')
        pod_obj.wait_for_resource(
            condition='Completed',
            resource_name=fio_client_pod,
            timeout=18000,
            sleep=300,
        )

        output = run_cmd(f'oc logs {fio_client_pod}')

        try:
            if 'Fio failed to execute' not in output:
                log.info("FIO has completed successfully")
        except IOError:
            log.info("FIO failed to complete")

        # Clean up fio benchmark
        log.info("Deleting FIO benchmark")
        fio_cr_obj.delete()

        # Setting back the original elastic-search information
        fio_cr['spec']['elasticsearch'] = {
            'server': es_server,
            'port': es_port
        }
        analyze_regression(io_pattern,
                           sc,
                           es_username=fio_cr['spec']['test_user'])
Beispiel #21
0
    def test_fio_workload_simple(self, ripsaw, es, interface, io_pattern):
        """
        This is a basic fio perf test

        """

        # Deployment ripsaw
        log.info("Deploying ripsaw operator")
        ripsaw.apply_crd('resources/crds/' 'ripsaw_v1alpha1_ripsaw_crd.yaml')
        if interface == 'CephBlockPool':
            sc = constants.CEPHBLOCKPOOL_SC
        else:
            sc = constants.CEPHFILESYSTEM_SC

        # Create fio benchmark
        log.info("Create resource file for fio workload")
        fio_cr = templating.load_yaml(constants.FIO_CR_YAML)

        # Saving the Original elastic-search IP and PORT - if defined in yaml
        if 'elasticsearch' in fio_cr['spec']:
            backup_es = fio_cr['spec']['elasticsearch']
        else:
            log.warning(
                'Elastic Search information does not exists in YAML file')
            fio_cr['spec']['elasticsearch'] = {}

        # Use the internal define elastic-search server in the test - if exist
        if es:
            fio_cr['spec']['elasticsearch'] = {
                'server': es.get_ip(),
                'port': es.get_port()
            }

        # Setting the data set to 40% of the total storage capacity
        ceph_cluster = CephCluster()
        ceph_capacity = ceph_cluster.get_ceph_capacity()
        total_data_set = int(ceph_capacity * 0.4)
        filesize = int(fio_cr['spec']['workload']['args']['filesize'].replace(
            'GiB', ''))
        # To make sure the number of App pods will not be more then 50, in case
        # of large data set, changing the size of the file each pod will work on
        if total_data_set > 500:
            filesize = int(ceph_capacity * 0.008)
            fio_cr['spec']['workload']['args']['filesize'] = f'{filesize}GiB'
            # make sure that the storage size is larger then the file size
            fio_cr['spec']['workload']['args'][
                'storagesize'] = f'{int(filesize * 1.2)}Gi'
        fio_cr['spec']['workload']['args']['servers'] = int(total_data_set /
                                                            filesize)
        log.info(f'Total Data set to work on is : {total_data_set} GiB')

        environment = get_environment_info()
        if not environment['user'] == '':
            fio_cr['spec']['test_user'] = environment['user']
        fio_cr['spec']['clustername'] = environment['clustername']

        log.debug(f'Environment information is : {environment}')

        fio_cr['spec']['workload']['args']['storageclass'] = sc
        if io_pattern == 'sequential':
            fio_cr['spec']['workload']['args']['jobs'] = ['write', 'read']
            fio_cr['spec']['workload']['args']['iodepth'] = 1
        log.info(f'The FIO CR file is {fio_cr}')
        fio_cr_obj = OCS(**fio_cr)
        fio_cr_obj.create()

        # Wait for fio client pod to be created
        for fio_pod in TimeoutSampler(300, 20, get_pod_name_by_pattern,
                                      'fio-client',
                                      constants.RIPSAW_NAMESPACE):
            try:
                if fio_pod[0] is not None:
                    fio_client_pod = fio_pod[0]
                    break
            except IndexError:
                log.info("Bench pod not ready yet")

        # Getting the start time of the test
        start_time = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime())

        # Getting the UUID from inside the benchmark pod
        uuid = ripsaw.get_uuid(fio_client_pod)
        # Setting back the original elastic-search information
        fio_cr['spec']['elasticsearch'] = backup_es

        full_results = FIOResultsAnalyse(uuid, fio_cr)

        # Initialize the results doc file.
        for key in environment:
            full_results.add_key(key, environment[key])

        # Setting the global parameters of the test
        full_results.add_key('io_pattern', io_pattern)
        full_results.add_key('dataset', f'{total_data_set}GiB')
        full_results.add_key('file_size',
                             fio_cr['spec']['workload']['args']['filesize'])
        full_results.add_key('servers',
                             fio_cr['spec']['workload']['args']['servers'])
        full_results.add_key('samples',
                             fio_cr['spec']['workload']['args']['samples'])
        full_results.add_key('operations',
                             fio_cr['spec']['workload']['args']['jobs'])
        full_results.add_key('block_sizes',
                             fio_cr['spec']['workload']['args']['bs'])
        full_results.add_key('io_depth',
                             fio_cr['spec']['workload']['args']['iodepth'])
        full_results.add_key('jobs',
                             fio_cr['spec']['workload']['args']['numjobs'])
        full_results.add_key(
            'runtime', {
                'read': fio_cr['spec']['workload']['args']['read_runtime'],
                'write': fio_cr['spec']['workload']['args']['write_runtime']
            })
        full_results.add_key(
            'storageclass', fio_cr['spec']['workload']['args']['storageclass'])
        full_results.add_key('vol_size',
                             fio_cr['spec']['workload']['args']['storagesize'])

        # Wait for fio pod to initialized and complete
        log.info("Waiting for fio_client to complete")
        pod_obj = OCP(kind='pod')
        pod_obj.wait_for_resource(
            condition='Completed',
            resource_name=fio_client_pod,
            timeout=18000,
            sleep=300,
        )

        # Getting the end time of the test
        end_time = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime())
        full_results.add_key('test_time', {
            'start': start_time,
            'end': end_time
        })

        output = run_cmd(f'oc logs {fio_client_pod}')
        log.info(f'The Test log is : {output}')

        try:
            if 'Fio failed to execute' not in output:
                log.info("FIO has completed successfully")
        except IOError:
            log.info("FIO failed to complete")

        # Clean up fio benchmark
        log.info("Deleting FIO benchmark")
        fio_cr_obj.delete()

        log.debug(f'Full results is : {full_results.results}')

        # if Internal ES is exists, Copy all data from the Internal to main ES
        if es:
            log.info('Copy all data from Internal ES to Main ES')
            es._copy(full_results.es)
        # Adding this sleep between the copy and the analyzing of the results
        # since sometimes the results of the read (just after write) are empty
        time.sleep(30)
        full_results.analyze_results()  # Analyze the results
        # Writing the analyzed test results to the Elastic-Search server
        full_results.es_write()
        full_results.codespeed_push()  # Push results to codespeed
        # Creating full link to the results on the ES server
        log.info(f'The Result can be found at ; {full_results.results_link()}')
Beispiel #22
0
class QuayOperator(object):
    """
    Quay operator class

    """
    def __init__(self):
        """
        Quay operator initializer function

        """
        self.namespace = constants.OPENSHIFT_OPERATORS
        self.ocp_obj = ocp.OCP(namespace=self.namespace)
        self.quay_operator = None
        self.quay_registry = None
        self.quay_registry_secret = None
        self.quay_pod_obj = OCP(kind=constants.POD, namespace=self.namespace)
        self.quay_registry_name = ""
        self.quay_operator_csv = ""
        self.quay_registry_secret_name = ""
        self.sc_default = False
        self.sc_name = (constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD
                        if storagecluster_independent_check() else
                        constants.DEFAULT_STORAGECLASS_RBD)

    def setup_quay_operator(self):
        """
        Deploys Quay operator

        """
        quay_operator_data = templating.load_yaml(file=constants.QUAY_SUB)
        self.quay_operator = OCS(**quay_operator_data)
        logger.info(f"Installing Quay operator: {self.quay_operator.name}")
        self.quay_operator.create()
        for quay_pod in TimeoutSampler(300, 10, get_pod_name_by_pattern,
                                       constants.QUAY_OPERATOR,
                                       self.namespace):
            if quay_pod:
                self.quay_pod_obj.wait_for_resource(
                    condition=constants.STATUS_RUNNING,
                    resource_name=quay_pod[0],
                    sleep=30,
                    timeout=600,
                )
                break
        self.quay_operator_csv = get_csvs_start_with_prefix(
            csv_prefix=constants.QUAY_OPERATOR,
            namespace=self.namespace,
        )[0]["metadata"]["name"]

    def create_quay_registry(self):
        """
        Creates Quay registry

        """
        if not helpers.get_default_storage_class():
            patch = ' \'{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}\' '
            run_cmd(f"oc patch storageclass {self.sc_name} "
                    f"-p {patch} "
                    f"--request-timeout=120s")
            self.sc_default = True
        self.quay_registry_secret_name = create_unique_resource_name(
            "quay-user", "secret")
        logger.info(
            f"Creating Quay registry config for super-user access: {self.quay_registry_secret_name}"
        )
        self.quay_registry_secret = self.ocp_obj.exec_oc_cmd(
            command=
            f"create secret generic --from-file config.yaml={constants.QUAY_SUPER_USER} "
            f"{self.quay_registry_secret_name}")
        quay_registry_data = templating.load_yaml(file=constants.QUAY_REGISTRY)
        self.quay_registry_name = quay_registry_data["metadata"]["name"]
        quay_registry_data["spec"][
            "configBundleSecret"] = self.quay_registry_secret_name
        self.quay_registry = OCS(**quay_registry_data)
        logger.info(f"Creating Quay registry: {self.quay_registry.name}")
        self.quay_registry.create()
        logger.info("Waiting for 15s for registry to get initialized")
        sleep(15)
        self.wait_for_quay_endpoint()

    def wait_for_quay_endpoint(self):
        """
        Waits for quay registry endpoint

        """
        logger.info("Waiting for quay registry endpoint to be up")
        sample = TimeoutSampler(
            timeout=300,
            sleep=15,
            func=self.check_quay_registry_endpoint,
        )
        if not sample.wait_for_func_status(result=True):
            logger.error("Quay registry endpoint did not get created.")
            raise TimeoutExpiredError
        else:
            logger.info("Quay registry endpoint is up")

    def check_quay_registry_endpoint(self):
        """
        Checks if quay registry endpoint is up

        Returns:
            bool: True if quay endpoint is up else False

        """
        return (True if
                self.quay_registry.get().get("status").get("registryEndpoint")
                else False)

    def get_quay_endpoint(self):
        """
        Returns quay endpoint

        """
        return self.quay_registry.get().get("status").get("registryEndpoint")

    def teardown(self):
        """
        Quay operator teardown

        """
        if self.sc_default:
            patch = ' \'{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}\' '
            run_cmd(f"oc patch storageclass {self.sc_name} "
                    f"-p {patch} "
                    f"--request-timeout=120s")
        if self.quay_registry_secret:
            self.ocp_obj.exec_oc_cmd(
                f"delete secret {self.quay_registry_secret_name}")
        if self.quay_registry:
            self.quay_registry.delete()
        if self.quay_operator:
            self.quay_operator.delete()
        if self.quay_operator_csv:
            self.ocp_obj.exec_oc_cmd(
                f"delete {constants.CLUSTER_SERVICE_VERSION} "
                f"{self.quay_operator_csv}")
    def test_run_pgsql(self, transactions, pod_name):
        """
        Test pgsql workload
        """
        # Create pgbench benchmark
        log.info("Create resource file for pgbench workload")
        pg_trans = transactions
        timeout = pg_trans * 3
        pg_data = templating.load_yaml(constants.PGSQL_BENCHMARK_YAML)
        pg_data['spec']['workload']['args']['transactions'] = pg_trans
        pg_obj = OCS(**pg_data)
        pg_obj.create()

        # Wait for pgbench pod to be created
        for pgbench_pod in TimeoutSampler(
            pg_trans, 3, get_pod_name_by_pattern,
            'pgbench', 'my-ripsaw'
        ):
            try:
                if pgbench_pod[0] is not None:
                    pgbench_client_pod = pgbench_pod[0]
                    break
            except IndexError:
                log.info("Bench pod not ready yet")

        # Respin Ceph pod
        resource_osd = [f'{pod_name}']
        log.info(f"Respin Ceph pod {pod_name}")
        disruption = disruption_helpers.Disruptions()
        for resource in resource_osd:
            disruption.set_resource(resource=resource)
            disruption.delete_resource()

        # Wait for pg_bench pod to initialized and complete
        log.info("Waiting for pgbench_client to complete")
        pod_obj = OCP(kind='pod')
        pod_obj.wait_for_resource(
            condition='Completed',
            resource_name=pgbench_client_pod,
            timeout=timeout,
            sleep=10,
        )

        # Running pgbench and parsing logs
        output = run_cmd(f'oc logs {pgbench_client_pod}')
        pg_output = utils.parse_pgsql_logs(output)
        log.info(
            "*******PGBench output log*********\n"
            f"{pg_output}"
        )
        for data in pg_output:
            latency_avg = data['latency_avg']
            if not latency_avg:
                raise UnexpectedBehaviour(
                    "PGBench failed to run, no data found on latency_avg"
                )
        log.info("PGBench has completed successfully")

        # Collect data and export to Google doc spreadsheet
        g_sheet = GoogleSpreadSheetAPI(sheet_name="OCS PGSQL", sheet_index=2)
        for lat in pg_output:
            lat_avg = lat['latency_avg']
            lat_stddev = lat['lat_stddev']
            tps_incl = lat['tps_incl']
            tps_excl = lat['tps_excl']
            g_sheet.insert_row(
                [int(lat_avg),
                 int(lat_stddev),
                 int(tps_incl),
                 int(tps_excl)], 2
            )
        # Clean up pgbench benchmark
        log.info("Deleting PG bench benchmark")
        pg_obj.delete()
Beispiel #24
0
class Postgresql(RipSaw):
    """
    Postgresql workload operation
    """
    def __init__(self, **kwargs):
        """
        Initializer function

        """
        super().__init__(**kwargs)
        self._apply_crd(crd=RIPSAW_CRD)

    def _apply_crd(self, crd):
        """
        Apply the CRD

        Args:
            crd (str): yaml to apply

        """
        RipSaw.apply_crd(self, crd=crd)

    def setup_postgresql(self, replicas):
        """
        Deploy postgres sql server

        Args:
            replicas (int): Number of postgresql pods to be deployed

        Raises:
            CommandFailed: If PostgreSQL server setup fails

        """
        log.info("Deploying postgres database")
        try:
            pgsql_service = templating.load_yaml(constants.PGSQL_SERVICE_YAML)
            pgsql_cmap = templating.load_yaml(constants.PGSQL_CONFIGMAP_YAML)
            pgsql_sset = templating.load_yaml(constants.PGSQL_STATEFULSET_YAML)
            pgsql_sset['spec']['replicas'] = replicas
            self.pgsql_service = OCS(**pgsql_service)
            self.pgsql_service.create()
            self.pgsql_cmap = OCS(**pgsql_cmap)
            self.pgsql_cmap.create()
            self.pgsql_sset = OCS(**pgsql_sset)
            self.pgsql_sset.create()
            self.pod_obj.wait_for_resource(condition='Running',
                                           selector='app=postgres',
                                           resource_count=replicas,
                                           timeout=3600)
        except (CommandFailed, CalledProcessError) as cf:
            log.error('Failed during setup of PostgreSQL server')
            raise cf
        self.pgsql_is_setup = True
        log.info("Successfully deployed postgres database")

    def create_pgbench_benchmark(self,
                                 replicas,
                                 clients=None,
                                 threads=None,
                                 transactions=None,
                                 scaling_factor=None,
                                 timeout=None):
        """
        Create pgbench benchmark pods

        Args:
            replicas (int): Number of pgbench pods to be deployed
            clients (int): Number of clients
            threads (int): Number of threads
            transactions (int): Number of transactions
            scaling_factor (int): scaling factor
            timeout (int): Time in seconds to wait

        Returns:
            List: pgbench pod objects list

        """
        pg_obj_list = []
        for i in range(replicas):
            log.info("Create resource file for pgbench workload")
            pg_data = templating.load_yaml(constants.PGSQL_BENCHMARK_YAML)
            pg_data['metadata']['name'] = 'pgbench-benchmark' + f"{i}"
            pg_data['spec']['workload']['args']['databases'][0][
                'host'] = "postgres-" + f"{i}" + ".postgres"

            if clients is not None:
                pg_data['spec']['workload']['args']['clients'][0] = clients
            if threads is not None:
                pg_data['spec']['workload']['args']['threads'] = threads
            if transactions is not None:
                pg_data['spec']['workload']['args'][
                    'transactions'] = transactions
            if scaling_factor is not None:
                pg_data['spec']['workload']['args'][
                    'scaling_factor'] = scaling_factor
            pg_obj = OCS(**pg_data)
            pg_obj_list.append(pg_obj)
            pg_obj.create()
        # Confirm that expected pgbench pods are spinned
        log.info("Checking if Getting pgbench pods name")
        timeout = timeout if timeout else 300
        for pgbench_pods in TimeoutSampler(timeout, replicas,
                                           get_pod_name_by_pattern,
                                           'pgbench-1-dbs-client',
                                           RIPSAW_NAMESPACE):
            try:
                if len(pgbench_pods) == replicas:
                    log.info(f"Expected number of pgbench pods are "
                             f"found: {replicas}")
                    break
            except IndexError:
                log.info(f'Expected number of pgbench pods are {replicas} '
                         f'but only found {len(pgbench_pods)}')
        return pg_obj_list

    def get_postgres_pods(self):
        """
        Get all postgres pods
        Returns:
            List: postgres pod objects list
        """
        return get_all_pods(namespace=RIPSAW_NAMESPACE, selector=['postgres'])

    def get_pgbench_pods(self):
        """
        Get all pgbench pods

        Returns:
            List: pgbench pod objects list

        """
        return [
            get_pod_obj(pod)
            for pod in get_pod_name_by_pattern('pgbench', RIPSAW_NAMESPACE)
        ]

    def delete_pgbench_pods(self, pg_obj_list):
        """
        Delete all pgbench pods on cluster

        Returns:
            bool: True if deleted, False otherwise

        """
        log.info("Delete pgbench Benchmark")
        for pgbench_pod in pg_obj_list:
            pgbench_pod.delete(force=True)

    def is_pgbench_running(self):
        """
        Check if pgbench is running

        Returns:
            bool: True if pgbench is running; False otherwise

        """
        pod_objs = self.get_pgbench_pods()
        for pod in pod_objs:
            if pod.get().get('status').get('containerStatuses')[0].get(
                    'state') == 'running':
                log.info("One or more pgbench pods are in running state")
                return True
            else:
                return False
            break

    def get_pgbench_status(self, pgbench_pod_name):
        """
        Get pgbench status

        Args:
            pgbench_pod_name (str): Name of the pgbench pod

        Returns:
            str: state of pgbench pod (running/completed)

        """
        pod_obj = get_pod_obj(pgbench_pod_name, namespace=RIPSAW_NAMESPACE)
        status = pod_obj.get().get('status').get('containerStatuses')[0].get(
            'state')

        return 'running' if list(
            status.keys())[0] == 'running' else status['terminated']['reason']

    def wait_for_postgres_status(self,
                                 status=constants.STATUS_RUNNING,
                                 timeout=300):
        """
        Wait for postgres pods status to reach running/completed

        Args:
            status (str): status to reach Running or Completed
            timeout (int): Time in seconds to wait

        """
        log.info(f"Waiting for postgres pods to be reach {status} state")
        postgres_pod_objs = self.get_postgres_pods()
        for postgres_pod_obj in postgres_pod_objs:
            wait_for_resource_state(resource=postgres_pod_obj,
                                    state=status,
                                    timeout=timeout)

    def wait_for_pgbench_status(self, status, timeout=None):
        """
        Wait for pgbench benchmark pods status to reach running/completed

        Args:
            status (str): status to reach Running or Completed
            timeout (int): Time in seconds to wait

        """
        """
        Sometimes with the default values in the benchmark yaml the pgbench pod is not
        getting completed within the specified time and the tests are failing.
        I think it is varying with the infrastructure.
        So, for now we set the timeout to 30 mins and will start monitoring each pg bench
        pods for each run.Based on the results we will define the timeout again
        """
        timeout = timeout if timeout else 1800
        # Wait for pg_bench pods to initialized and running
        log.info(f"Waiting for pgbench pods to be reach {status} state")
        pgbench_pod_objs = self.get_pgbench_pods()
        for pgbench_pod_obj in pgbench_pod_objs:
            try:
                wait_for_resource_state(resource=pgbench_pod_obj,
                                        state=status,
                                        timeout=timeout)
            except ResourceWrongStatusException:
                output = run_cmd(f'oc logs {pgbench_pod_obj.name}')
                error_msg = f'{pgbench_pod_obj.name} did not reach to {status} state after {timeout} sec\n{output}'
                log.error(error_msg)
                raise UnexpectedBehaviour(error_msg)

    def validate_pgbench_run(self, pgbench_pods, print_table=True):
        """
        Validate pgbench run

        Args:
            pgbench pods (list): List of pgbench pods

        Returns:
            pg_output (list): pgbench outputs in list

        """
        all_pgbench_pods_output = []
        for pgbench_pod in pgbench_pods:
            log.info(f"pgbench_client_pod===={pgbench_pod.name}====")
            output = run_cmd(f'oc logs {pgbench_pod.name}')
            pg_output = utils.parse_pgsql_logs(output)
            log.info("*******PGBench output log*********\n" f"{pg_output}")
            # for data in all_pgbench_pods_output:
            for data in pg_output:
                run_id = list(data.keys())
                latency_avg = data[run_id[0]]['latency_avg']
                if not latency_avg:
                    raise UnexpectedBehaviour("PGBench failed to run, "
                                              "no data found on latency_avg")
            log.info(f"PGBench on {pgbench_pod.name} completed successfully")
            all_pgbench_pods_output.append((pg_output, pgbench_pod.name))

        if print_table:
            pgbench_pod_table = PrettyTable()
            pgbench_pod_table.field_names = [
                'pod_name', 'scaling_factor', 'num_clients', 'num_threads',
                'trans_client', 'actually_trans', 'latency_avg', 'lat_stddev',
                'tps_incl', 'tps_excl'
            ]
            for pgbench_pod_out in all_pgbench_pods_output:
                for pod_output in pgbench_pod_out[0]:
                    for pod in pod_output.values():
                        pgbench_pod_table.add_row([
                            pgbench_pod_out[1], pod['scaling_factor'],
                            pod['num_clients'], pod['num_threads'],
                            pod['number_of_transactions_per_client'],
                            pod['number_of_transactions_actually_processed'],
                            pod['latency_avg'], pod['lat_stddev'],
                            pod['tps_incl'], pod['tps_excl']
                        ])
            log.info(f'\n{pgbench_pod_table}\n')

        return all_pgbench_pods_output

    def get_pgsql_nodes(self):
        """
        Get nodes that contain a pgsql app pod

        Returns:
            list: Cluster node OCP objects

        """
        pgsql_pod_objs = self.pod_obj.get(selector=constants.PGSQL_APP_LABEL,
                                          all_namespaces=True)
        log.info("Create a list of nodes that contain a pgsql app pod")
        nodes_set = set()
        for pod in pgsql_pod_objs['items']:
            log.info(
                f"pod {pod['metadata']['name']} located on node {pod['spec']['nodeName']}"
            )
            nodes_set.add(pod['spec']['nodeName'])
        return list(nodes_set)

    def respin_pgsql_app_pod(self):
        """
        Respin the pgsql app pod

        Returns:
            pod status

        """
        app_pod_list = get_operator_pods(constants.PGSQL_APP_LABEL,
                                         constants.RIPSAW_NAMESPACE)
        app_pod = app_pod_list[random.randint(0, len(app_pod_list) - 1)]
        log.info(f"respin pod {app_pod.name}")
        app_pod.delete(wait=True, force=False)
        wait_for_resource_state(resource=app_pod,
                                state=constants.STATUS_RUNNING,
                                timeout=300)

    def get_pgbech_pod_status_table(self, pgbench_pods):
        """
        Get pgbench pod data and print results on a table

        Args:
            pgbench pods (list): List of pgbench pods

        """
        pgbench_pod_table = PrettyTable()
        pgbench_pod_table.field_names = [
            'pod_name', 'scaling_factor', 'num_clients', 'num_threads',
            'trans_client', 'actually_trans', 'latency_avg', 'lat_stddev',
            'tps_incl', 'tps_excl'
        ]
        for pgbench_pod in pgbench_pods:
            output = run_cmd(f'oc logs {pgbench_pod.name}')
            pg_output = utils.parse_pgsql_logs(output)
            for pod_output in pg_output:
                for pod in pod_output.values():
                    pgbench_pod_table.add_row([
                        pgbench_pod.name, pod['scaling_factor'],
                        pod['num_clients'], pod['num_threads'],
                        pod['number_of_transactions_per_client'],
                        pod['number_of_transactions_actually_processed'],
                        pod['latency_avg'], pod['lat_stddev'], pod['tps_incl'],
                        pod['tps_excl']
                    ])
        log.info(f'\n{pgbench_pod_table}\n')

    def cleanup(self):
        """
        Clean up

        """
        log.info("Deleting postgres pods and configuration")
        if self.pgsql_is_setup:
            self.pgsql_sset.delete()
            self.pgsql_cmap.delete()
            self.pgsql_service.delete()
        log.info("Deleting pgbench pods")
        pods_obj = self.get_pgbench_pods()
        for pod in pods_obj:
            pod.delete()
            pod.ocp.wait_for_delete(pod.name)
        log.info("Deleting ripsaw configuration")
        RipSaw.cleanup(self)
Beispiel #25
0
    def test_recovery_from_volume_deletion(self, nodes, pvc_factory,
                                           pod_factory):
        """
        Test cluster recovery from disk deletion from the platform side.
        Based on documented procedure detailed in
        https://bugzilla.redhat.com/show_bug.cgi?id=1823183

        """
        logger.info("Picking a PV which to be deleted from the platform side")
        osd_pvs = get_deviceset_pvs()
        osd_pv = random.choice(osd_pvs)
        osd_pv_name = osd_pv.name
        # get the claim name
        logger.info(f"Getting the claim name for OSD PV {osd_pv_name}")
        claim_name = osd_pv.get().get("spec").get("claimRef").get("name")

        # Get the backing volume name
        logger.info(f"Getting the backing volume name for PV {osd_pv_name}")
        backing_volume = nodes.get_data_volumes(pvs=[osd_pv])[0]

        # Get the corresponding PVC
        logger.info(f"Getting the corresponding PVC of PV {osd_pv_name}")
        osd_pvcs = get_deviceset_pvcs()
        osd_pvcs_count = len(osd_pvcs)
        osd_pvc = [
            ds for ds in osd_pvcs
            if ds.get().get("metadata").get("name") == claim_name
        ][0]

        # Get the corresponding OSD pod and ID
        logger.info(f"Getting the OSD pod using PVC {osd_pvc.name}")
        osd_pods = get_osd_pods()
        osd_pods_count = len(osd_pods)
        osd_pod = [
            osd_pod for osd_pod in osd_pods
            if osd_pod.get().get("metadata").get("labels").get(
                constants.CEPH_ROOK_IO_PVC_LABEL) == claim_name
        ][0]
        logger.info(f"OSD_POD {osd_pod.name}")
        osd_id = osd_pod.get().get("metadata").get("labels").get("ceph-osd-id")

        # Get the node that has the OSD pod running on
        logger.info(
            f"Getting the node that has the OSD pod {osd_pod.name} running on")
        osd_node = get_pod_node(osd_pod)
        osd_prepare_pods = get_osd_prepare_pods()
        osd_prepare_pod = [
            pod for pod in osd_prepare_pods if pod.get().get("metadata").get(
                "labels").get(constants.CEPH_ROOK_IO_PVC_LABEL) == claim_name
        ][0]
        osd_prepare_job_name = (osd_prepare_pod.get().get("metadata").get(
            "labels").get("job-name"))
        osd_prepare_job = get_job_obj(osd_prepare_job_name)

        # Get the corresponding OSD deployment
        logger.info(f"Getting the OSD deployment for OSD PVC {claim_name}")
        osd_deployment = [
            osd_pod for osd_pod in get_osd_deployments()
            if osd_pod.get().get("metadata").get("labels").get(
                constants.CEPH_ROOK_IO_PVC_LABEL) == claim_name
        ][0]
        osd_deployment_name = osd_deployment.name

        # Delete the volume from the platform side
        logger.info(f"Deleting {backing_volume} from the platform side")
        nodes.detach_volume(backing_volume, osd_node)

        # Scale down OSD deployment
        logger.info(f"Scaling down OSD deployment {osd_deployment_name} to 0")
        ocp.OCP().exec_oc_cmd(
            f"scale --replicas=0 deployment/{osd_deployment_name}")

        # Force delete OSD pod if necessary
        osd_pod_name = osd_pod.name
        logger.info(f"Waiting for OSD pod {osd_pod.name} to get deleted")
        try:
            osd_pod.ocp.wait_for_delete(resource_name=osd_pod_name)
        except TimeoutError:
            osd_pod.delete(force=True)
            osd_pod.ocp.wait_for_delete(resource_name=osd_pod_name)

        # Run ocs-osd-removal job
        ocp_version = float(get_ocp_version())
        if ocp_version >= 4.6:
            cmd = f"process ocs-osd-removal -p FAILED_OSD_IDS={osd_id} -o yaml"
        else:
            cmd = f"process ocs-osd-removal -p FAILED_OSD_ID={osd_id} -o yaml"

        logger.info(f"Executing OSD removal job on OSD-{osd_id}")
        ocp_obj = ocp.OCP(namespace=config.ENV_DATA["cluster_namespace"])
        osd_removal_job_yaml = ocp_obj.exec_oc_cmd(cmd)
        osd_removal_job = OCS(**osd_removal_job_yaml)
        osd_removal_job.create(do_reload=False)

        # Get ocs-osd-removal pod name
        logger.info("Getting the ocs-osd-removal pod name")
        osd_removal_pod_name = get_osd_removal_pod_name(osd_id)
        osd_removal_pod_obj = get_pod_obj(osd_removal_pod_name,
                                          namespace="openshift-storage")
        osd_removal_pod_obj.ocp.wait_for_resource(
            condition=constants.STATUS_COMPLETED,
            resource_name=osd_removal_pod_name)

        # Verify OSD removal from the ocs-osd-removal pod logs
        logger.info(
            f"Verifying removal of OSD from {osd_removal_pod_name} pod logs")
        logs = get_pod_logs(osd_removal_pod_name)
        pattern = f"purged osd.{osd_id}"
        assert re.search(pattern, logs)

        osd_pvc_name = osd_pvc.name

        if ocp_version < 4.6:
            # Delete the OSD prepare job
            logger.info(f"Deleting OSD prepare job {osd_prepare_job_name}")
            osd_prepare_job.delete()
            osd_prepare_job.ocp.wait_for_delete(
                resource_name=osd_prepare_job_name, timeout=120)

            # Delete the OSD PVC
            logger.info(f"Deleting OSD PVC {osd_pvc_name}")
            osd_pvc.delete()
            osd_pvc.ocp.wait_for_delete(resource_name=osd_pvc_name)

            # Delete the OSD deployment
            logger.info(f"Deleting OSD deployment {osd_deployment_name}")
            osd_deployment.delete()
            osd_deployment.ocp.wait_for_delete(
                resource_name=osd_deployment_name, timeout=120)
        else:
            # If ocp version is '4.6' and above the osd removal job should
            # delete the OSD prepare job, OSD PVC, OSD deployment
            logger.info(
                f"Verifying deletion of OSD prepare job {osd_prepare_job_name}"
            )
            osd_prepare_job.ocp.wait_for_delete(
                resource_name=osd_prepare_job_name, timeout=30)
            logger.info(f"Verifying deletion of OSD PVC {osd_pvc_name}")
            osd_pvc.ocp.wait_for_delete(resource_name=osd_pvc_name, timeout=30)
            logger.info(
                f"Verifying deletion of OSD deployment {osd_deployment_name}")
            osd_deployment.ocp.wait_for_delete(
                resource_name=osd_deployment_name, timeout=30)

        # Delete PV
        logger.info(f"Verifying deletion of PV {osd_pv_name}")
        try:
            osd_pv.ocp.wait_for_delete(resource_name=osd_pv_name)
        except TimeoutError:
            osd_pv.delete()
            osd_pv.ocp.wait_for_delete(resource_name=osd_pv_name)

        if ocp_version < 4.6:
            # Delete the rook ceph operator pod to trigger reconciliation
            rook_operator_pod = get_operator_pods()[0]
            logger.info(
                f"deleting Rook Ceph operator pod {rook_operator_pod.name}")
            rook_operator_pod.delete()

        # Delete the OSD removal job
        logger.info(f"Deleting OSD removal job ocs-osd-removal-{osd_id}")
        osd_removal_job = get_job_obj(f"ocs-osd-removal-{osd_id}")
        osd_removal_job.delete()
        osd_removal_job.ocp.wait_for_delete(
            resource_name=f"ocs-osd-removal-{osd_id}")

        timeout = 600
        # Wait for OSD PVC to get created and reach Bound state
        logger.info(
            "Waiting for a new OSD PVC to get created and reach Bound state")
        assert osd_pvc.ocp.wait_for_resource(
            timeout=timeout,
            condition=constants.STATUS_BOUND,
            selector=constants.OSD_PVC_GENERIC_LABEL,
            resource_count=osd_pvcs_count,
        ), (f"Cluster recovery failed after {timeout} seconds. "
            f"Expected to have {osd_pvcs_count} OSD PVCs in status Bound. Current OSD PVCs status: "
            f"{[pvc.ocp.get_resource(pvc.get().get('metadata').get('name'), 'STATUS') for pvc in get_deviceset_pvcs()]}"
            )
        # Wait for OSD pod to get created and reach Running state
        logger.info(
            "Waiting for a new OSD pod to get created and reach Running state")
        assert osd_pod.ocp.wait_for_resource(
            timeout=timeout,
            condition=constants.STATUS_RUNNING,
            selector=constants.OSD_APP_LABEL,
            resource_count=osd_pods_count,
        ), (f"Cluster recovery failed after {timeout} seconds. "
            f"Expected to have {osd_pods_count} OSD pods in status Running. Current OSD pods status: "
            f"{[osd_pod.ocp.get_resource(pod.get().get('metadata').get('name'), 'STATUS') for pod in get_osd_pods()]}"
            )

        # We need to silence the old osd crash warning due to BZ https://bugzilla.redhat.com/show_bug.cgi?id=1896810
        # This is a workaround - issue for tracking: https://github.com/red-hat-storage/ocs-ci/issues/3438
        if ocp_version >= 4.6:
            silence_osd_crash = cluster.wait_for_silence_ceph_osd_crash_warning(
                osd_pod_name)
            if not silence_osd_crash:
                logger.info("Didn't find ceph osd crash warning")

        # Validate cluster is still functional
        self.sanity_helpers.health_check(tries=100)
        self.sanity_helpers.create_resources(pvc_factory, pod_factory)
Beispiel #26
0
class Cosbench(object):
    """
    Cosbench S3 benchmark tool

    """
    def __init__(self):
        """
        Initializer function

        """
        self.ns_obj = OCP(kind="namespace")
        self.namespace = constants.COSBENCH_PROJECT
        self.configmap_obj = OCP(namespace=self.namespace,
                                 kind=constants.CONFIGMAP)
        self.ocp_obj = OCP(namespace=self.namespace)
        self.cosbench_config = None
        self.cosbench_pod = None
        self.cosbench_dir = mkdtemp(prefix="cosbench-tool-")
        self.xml_file = ""
        self.workload_id = ""
        self.init_container = 1
        self.range_selector = "r"
        self.init_object = 1
        mcg_obj = MCG()
        self.access_key_id = mcg_obj.access_key_id
        self.access_key = mcg_obj.access_key
        self.endpoint = (
            "http://" +
            mcg_obj.s3_internal_endpoint.split("/")[2].split(":")[0])

    def setup_cosbench(self):
        """
        Setups Cosbench namespace, configmap and pod

        """
        # Create cosbench project
        self.ns_obj.new_project(project_name=self.namespace)

        # Create configmap
        config_data = templating.load_yaml(file=constants.COSBENCH_CONFIGMAP)
        cosbench_configmap_name = create_unique_resource_name(
            constants.COSBENCH, "configmap")
        config_data["metadata"]["name"] = cosbench_configmap_name
        config_data["metadata"]["namespace"] = self.namespace
        self.cosbench_config = OCS(**config_data)
        logger.info(
            f"Creating Cosbench configmap: {self.cosbench_config.name}")
        self.cosbench_config.create()
        self.configmap_obj.wait_for_resource(
            resource_name=self.cosbench_config.name,
            column="DATA",
            condition="4")

        # Create Cosbench pod
        cosbench_pod_data = templating.load_yaml(file=constants.COSBENCH_POD)
        cosbench_pod_data["spec"]["containers"][0]["envFrom"][0][
            "configMapRef"]["name"] = self.cosbench_config.name
        cosbench_pod_name = create_unique_resource_name(
            constants.COSBENCH, "pod")
        cosbench_pod_data["metadata"]["name"] = cosbench_pod_name
        cosbench_pod_data["metadata"]["namespace"] = self.namespace
        self.cosbench_pod = OCS(**cosbench_pod_data)
        logger.info(f"Creating Cosbench pod: {self.cosbench_pod.name}")
        self.cosbench_pod.create()
        helpers.wait_for_resource_state(resource=self.cosbench_pod,
                                        state=constants.STATUS_RUNNING,
                                        timeout=300)

    def _apply_mcg_auth(self, xml_root):
        """
        Applies MCG credentials

        Args:
            xml_root (Element): Root element of workload xml

        """
        xml_root[0].set(
            "config",
            f"accesskey={self.access_key_id};secretkey={self.access_key};"
            f"endpoint={self.endpoint};path_style_access=true",
        )

    def run_init_workload(
        self,
        prefix,
        containers,
        objects,
        start_container=None,
        start_object=None,
        size=64,
        size_unit="KB",
        sleep=15,
        timeout=300,
        validate=True,
    ):
        """
        Creates specific containers and objects in bulk

        Args:
            prefix (str): Prefix of bucket name.
            containers (int): Number of containers/buckets to be created.
            objects (int): Number of objects to be created on each bucket.
            start_container (int): Start of containers. Default: 1.
            start_object (int): Start of objects. Default: 1.
            size (int): Size of each objects.
            size_unit (str): Object size unit (B/KB/MB/GB)
            sleep (int): Sleep in seconds.
            timeout (int): Timeout in seconds.
            validate (bool): Validates whether init and prepare is completed.

        Returns:
            Tuple[str, str]: Workload xml and its name

        """
        init_template = """
        <workload name="Fill" description="Init and prepare operation">
        <storage type="s3" config="" />
          <workflow>
            <workstage name="init-containers">
              <work type="init" workers="1" config="" />
            </workstage>
            <workstage name="prepare-objects">
              <work type="prepare" workers="16" config="" />
            </workstage>
          </workflow>
        </workload>
        """
        xml_root, xml_tree = self._create_element_tree(template=init_template)
        workload_name = xml_root.get("name")
        self._apply_mcg_auth(xml_root)
        self.init_container = (start_container
                               if start_container else self.init_container)
        self.init_object = start_object if start_object else self.init_object
        init_container_config = self.generate_container_stage_config(
            self.range_selector,
            self.init_container,
            containers,
        )
        init_config = self.generate_stage_config(
            self.range_selector,
            self.init_container,
            containers,
            self.init_object,
            objects,
        )
        for stage in xml_root.iter("work"):
            if stage.get("type") == "init":
                stage.set("config",
                          f"cprefix={prefix};{init_container_config}")
            elif stage.get("type") == "prepare":
                stage.set(
                    "config",
                    f"cprefix={prefix};{init_config};sizes=c({str(size)}){size_unit}",
                )
        self._create_tmp_xml(xml_tree=xml_tree, xml_file_prefix=workload_name)
        self.submit_workload(workload_path=self.xml_file)
        self.wait_for_workload(workload_id=self.workload_id,
                               sleep=sleep,
                               timeout=timeout)
        if validate:
            self.validate_workload(workload_id=self.workload_id,
                                   workload_name=workload_name)
        else:
            return self.workload_id, workload_name

    def run_cleanup_workload(
        self,
        prefix,
        containers,
        objects,
        start_container=None,
        start_object=None,
        sleep=15,
        timeout=300,
        validate=True,
    ):
        """
        Deletes specific objects and containers in bulk.

        Args:
            prefix (str): Prefix of bucket name.
            containers (int): Number of containers/buckets to be created.
            objects (int): Number of objects to be created on each bucket.
            start_container (int): Start of containers. Default: 1.
            start_object (int): Start of objects. Default: 1.
            sleep (int): Sleep in seconds.
            timeout (int): Timeout in seconds.
            validate (bool): Validates whether cleanup and dispose is completed.

        Returns:
            Tuple[str, str]: Workload xml and its name

        """
        cleanup_template = """
        <workload name="Cleanup" description="Cleanup and Dispose">
          <storage type="s3" config="" />
          <workflow>
            <workstage name="cleanup-objects">
              <work type="cleanup" workers="4" config="" />
            </workstage>
            <workstage name="dispose-containers">
              <work type="dispose" workers="1" config="" />
            </workstage>
          </workflow>
        </workload>
        """
        xml_root, xml_tree = self._create_element_tree(
            template=cleanup_template)
        workload_name = xml_root.get("name")
        self._apply_mcg_auth(xml_root)
        self.init_container = (start_container
                               if start_container else self.init_container)
        self.init_object = start_object if start_object else self.init_object
        cleanuo_config = self.generate_stage_config(
            self.range_selector,
            self.init_container,
            containers,
            self.init_object,
            objects,
        )
        for stage in xml_root.iter("work"):
            if stage.get("type") == "cleanup":
                stage.set(
                    "config",
                    f"cprefix={prefix};{cleanuo_config}",
                )
            elif stage.get("type") == "dispose":
                stage.set("config", f"cprefix={prefix};{cleanuo_config}")

        self._create_tmp_xml(xml_tree=xml_tree, xml_file_prefix=workload_name)
        self.submit_workload(workload_path=self.xml_file)
        self.wait_for_workload(workload_id=self.workload_id,
                               sleep=sleep,
                               timeout=timeout)
        if validate:
            self.validate_workload(workload_id=self.workload_id,
                                   workload_name=workload_name)
        else:
            return self.workload_id, workload_name

    def run_main_workload(
        self,
        operation_type,
        prefix,
        containers,
        objects,
        workers=4,
        selector="s",
        start_container=None,
        start_object=None,
        size=64,
        size_unit="KB",
        sleep=15,
        timeout=300,
        extend_objects=None,
        validate=True,
        result=True,
    ):
        """
        Creates and runs main Cosbench workload.

        Args:
            operation_type (dict): Cosbench operation and its ratio.
                                   Operation (str): Supported ops are read, write, list and delete.
                                   Ratio (int): Percentage of each operation. Should add up to 100.
            workers (int): Number of users to perform operations.
            containers (int): Number of containers/buckets to be created.
            objects (int): Number of objects to be created on each bucket.
            selector (str): The way object is accessed/selected. u=uniform, r=range, s=sequential.
            prefix (str): Prefix of bucket name.
            start_container (int): Start of containers. Default: 1.
            start_object (int): Start of objects. Default: 1.
            size (int): Size of each objects.
            size_unit (str): Object size unit (B/KB/MB/GB)
            sleep (int): Sleep in seconds
            timeout (int): Timeout in seconds
            validate (bool): Validates whether each stage is completed
            extend_objects (int): Extends the total number of objects to prevent overlap.
                                  Use only for Write and Delete operations.
            result (bool): Get performance results when running workload is completed.

        Returns:
            Tuple[str, str]: Workload xml and its name

        """
        main_template = """
        <workload name="workload_name" description="Main workload">
          <storage type="s3" config="" />
          <workflow>
            <workstage name="Main">
              <work name="work_name" workers="4" division="object" runtime="60">
              </work>
            </workstage>
          </workflow>
        </workload>
        """
        xml_root, xml_tree = self._create_element_tree(template=main_template)
        workload_name = xml_root.get("name")
        self._apply_mcg_auth(xml_root)
        start_container = start_container if start_container else self.init_container
        start_object = start_object if start_object else self.init_object
        for stage in xml_root.iter("work"):
            stage.set("workers", f"{workers}")
            for operation, ratio in operation_type.items():
                if operation == "write" or "delete":
                    if extend_objects:
                        start_object = objects + 1
                        stage_config = self.generate_stage_config(
                            selector,
                            start_container,
                            containers,
                            start_object,
                            extend_objects,
                        )
                        attributes = {
                            "type":
                            f"{operation}",
                            "ratio":
                            f"{ratio}",
                            "config":
                            f"cprefix={prefix};{stage_config};sizes=c({str(size)}){size_unit}",
                        }
                        ElementTree.SubElement(stage, "operation", attributes)
                    else:
                        stage_config = self.generate_stage_config(
                            selector,
                            start_container,
                            containers,
                            start_object,
                            objects,
                        )

                        attributes = {
                            "type":
                            f"{operation}",
                            "ratio":
                            f"{ratio}",
                            "config":
                            f"cprefix={prefix};{stage_config};sizes=c({str(size)}){size_unit}",
                        }
                        ElementTree.SubElement(stage, "operation", attributes)
                else:
                    stage_config = self.generate_stage_config(
                        selector,
                        start_container,
                        containers,
                        start_object,
                        objects,
                    )
                    attributes = {
                        "type": f"{operation}",
                        "ratio": f"{ratio}",
                        "config": f"cprefix={prefix};{stage_config}",
                    }
                    ElementTree.SubElement(stage, "operation", attributes)

        self._create_tmp_xml(xml_tree=xml_tree, xml_file_prefix=workload_name)
        self.submit_workload(workload_path=self.xml_file)
        self.wait_for_workload(workload_id=self.workload_id,
                               sleep=sleep,
                               timeout=timeout)
        if validate:
            self.validate_workload(workload_id=self.workload_id,
                                   workload_name=workload_name)
        else:
            return self.workload_id, workload_name

        if result:
            throughput, bandwidth = self.get_performance_result(
                workload_id=self.workload_id,
                workload_name=workload_name,
                size=size,
            )
            return throughput, bandwidth
        else:
            return self.workload_id, workload_name

    @staticmethod
    def generate_stage_config(selector, start_container, end_container,
                              start_objects, end_object):
        """
        Generates config which is used in stage creation

        Args:
            selector (str): The way object is accessed/selected. u=uniform, r=range, s=sequential.
            start_container (int): Start of containers
            end_container (int): End of containers
            start_objects (int): Start of objects
            end_object (int): End of objects

        Returns:
            (str): Container and object configuration

        """
        xml_config = (
            f"containers={selector}({str(start_container)},{str(end_container)});"
            f"objects={selector}({str(start_objects)},{str(end_object)})")
        return xml_config

    @staticmethod
    def generate_container_stage_config(selector, start_container,
                                        end_container):
        """
        Generates container config which creates buckets in bulk

        Args:
            selector (str): The way object is accessed/selected. u=uniform, r=range, s=sequential.
            start_container (int): Start of containers
            end_container (int): End of containers

        Returns:
            (str): Container and object configuration

        """
        container_config = (
            f"containers={selector}({str(start_container)},{str(end_container)});"
        )
        return container_config

    def _create_tmp_xml(self, xml_tree, xml_file_prefix):
        """
        Creates a xml file and writes the workload

        Args:
            xml_file_prefix (str): Prefix of xml file
            xml_tree (Element): Element tree

        """
        self.xml_file = NamedTemporaryFile(
            dir=self.cosbench_dir,
            prefix=f"{xml_file_prefix}",
            suffix=".xml",
            delete=False,
        ).name
        logger.info(self.xml_file)
        xml_tree.write(self.xml_file)

    @staticmethod
    def _create_element_tree(template):
        """
        Creates element tree and root element of xml

        Args:
            template (str): Template of Cosbench workload

        Returns:
            Tuple[Element, ElementTree]: Root element and element tree of xml

        """
        xml_root = ElementTree.fromstring(text=template)
        xml_tree = ElementTree.ElementTree(element=xml_root)
        return xml_root, xml_tree

    def _copy_workload(self, workload_path):
        """
        Copies workload xml to Cosbench pod

        Args:
            workload_path (str): Absolute path of xml to copy

        """
        self.ocp_obj.exec_oc_cmd(
            command=f"cp {workload_path} {self.cosbench_pod.name}:/cos",
            out_yaml_format=False,
            timeout=180,
        )

    def submit_workload(self, workload_path):
        """
        Submits Cosbench xml to initiate workload

        Args:
            workload_path (str): Absolute path of xml to submit

        """
        self._copy_workload(workload_path=workload_path)
        workload = os.path.split(workload_path)[1]
        self._cosbench_cli(workload)

    @retry(AttributeError, tries=15, delay=5, backoff=1)
    def _cosbench_cli(self, workload):
        """
        Runs Cosbench cli to initiate workload

        Args:
            workload (str): Workload file

        """
        submit_key = "Accepted with ID"
        cobench_pod_obj = get_pod_obj(name=self.cosbench_pod.name,
                                      namespace=self.namespace)
        submit = cobench_pod_obj.exec_cmd_on_pod(
            command=f"/cos/cli.sh submit /cos/{workload}",
            out_yaml_format=True,
            timeout=180,
        )
        if submit_key in submit.keys():
            self.workload_id = submit[submit_key]
        else:
            assert f"Failed to submit the workload, ID not found. stdout: {submit}"

    def wait_for_workload(self, workload_id, sleep=1, timeout=60):
        """
        Waits for the cosbench workload to complete

        Args:
            workload_id (str): ID of cosbench workload
            sleep: sleep in seconds
            timeout: timeout in seconds to check if mirroring

        Returns:
            bool: Whether cosbench workload processed successfully

        """
        logger.info(f"Waiting for workload {workload_id} to be processed")
        pattern = f"sucessfully processed workload {workload_id}"
        try:
            for ret in TimeoutSampler(
                    timeout=timeout,
                    sleep=sleep,
                    func=get_pod_logs,
                    pod_name=self.cosbench_pod.name,
                    namespace=self.namespace,
            ):
                if re.search(pattern=pattern, string=ret):
                    break
            logger.info(
                f"Verified: Workload {workload_id} processed successfully")
            return True
        except TimeoutExpiredError:
            logger.error(
                f"Workload {workload_id} did not complete. Dumping cosbench pod log"
            )
            # Log cosbench pod for debugging purpose
            cosbench_log = get_pod_logs(pod_name=self.cosbench_pod.name,
                                        namespace=self.namespace)
            logger.debug(cosbench_log)
            return False

    def validate_workload(self, workload_id, workload_name):
        """
        Validates each stage of cosbench workload

        Args:
            workload_id (str): ID of cosbench workload
            workload_name (str): Name of the workload

        Raises:
            UnexpectedBehaviour: When workload csv is incorrect/malformed.

        """
        workload_csv = self.get_result_csv(workload_id=workload_id,
                                           workload_name=workload_name)
        with open(workload_csv, "r") as file:
            reader = csv.reader(file)
            header = next(reader)
            if header is not None:
                # Iterate over each row after the header
                logger.info(
                    f"Verifying whether each stage of workload {workload_id} completed"
                )
                for row in reader:
                    if row[16] == "completed":
                        logger.info(f"Stage {row[0]} completed successfully")
                    else:
                        assert (
                            f"Failed: Stage {row[0]} did not complete. Status {row[16]}"
                        )
            else:
                raise UnexpectedBehaviour(
                    f"Workload csv is incorrect/malformed. Dumping csv {reader}"
                )

    def get_result_csv(self, workload_id, workload_name):
        """
        Gets cosbench workload result csv

        Args:
            workload_id (str): ID of cosbench workload
            workload_name (str): Name of the workload

        Returns:
            str: Absolute path of the result csv

        """
        archive_file = f"{workload_id}-{workload_name}"
        cmd = (
            f"cp {self.cosbench_pod.name}:/cos/archive/{archive_file}/{archive_file}.csv "
            f"{self.cosbench_dir}/{archive_file}.csv ")
        self.ocp_obj.exec_oc_cmd(
            command=cmd,
            out_yaml_format=False,
            timeout=300,
        )
        return f"{self.cosbench_dir}/{archive_file}.csv"

    def cleanup(self):
        """
        Cosbench cleanup

        """
        switch_to_project(constants.COSBENCH_PROJECT)
        logger.info("Deleting Cosbench pod, configmap and namespace")
        self.cosbench_pod.delete()
        self.cosbench_config.delete()
        self.ns_obj.delete_project(self.namespace)
        self.ns_obj.wait_for_delete(resource_name=self.namespace, timeout=90)

    def get_performance_result(self, workload_name, workload_id, size):
        workload_file = self.get_result_csv(workload_id=workload_id,
                                            workload_name=workload_name)
        throughput_data = {}
        bandwidth_data = {}
        with open(workload_file, "r") as file:
            reader = csv.reader(file)
            header = next(reader)
            if header is not None:
                for row in reader:
                    throughput_data[row[1]] = row[13]
                    bandwidth_data[row[1]] = row[14]
            else:
                raise UnexpectedBehaviour(
                    f"Workload csv is incorrect/malformed. Dumping csv {reader}"
                )
        # Store throughput data on csv file
        log_path = f"{self.cosbench_dir}"
        with open(f"{log_path}/{workload_name}-{size}-throughput.csv",
                  "a") as fd:
            csv_obj = csv.writer(fd)
            for k, v in throughput_data.items():
                csv_obj.writerow([k, v])
        logger.info(
            f"Throughput data present in {log_path}/{workload_name}-{size}-throughput.csv"
        )

        # Store bandwidth data on csv file
        with open(f"{log_path}/{workload_name}-{size}-bandwidth.csv",
                  "a") as fd:
            csv_obj = csv.writer(fd)
            for k, v in bandwidth_data.items():
                csv_obj.writerow([k, v])
        logger.info(
            f"Bandwidth data present in {log_path}/{workload_name}-{size}-bandwidth.csv"
        )
        return throughput_data, bandwidth_data

    def cosbench_full(self):
        """
        Run full Cosbench workload
        """
        bucket_prefix = "bucket-"
        buckets = 10
        objects = 1000

        # Operations to perform and its ratio(%)
        operations = {"read": 50, "write": 50}

        # Deployment of cosbench
        self.setup_cosbench()

        # Create initial containers and objects
        self.run_init_workload(prefix=bucket_prefix,
                               containers=buckets,
                               objects=objects,
                               validate=True)
        # Start measuring time
        start_time = datetime.now()

        # Run main workload
        self.run_main_workload(
            operation_type=operations,
            prefix=bucket_prefix,
            containers=buckets,
            objects=objects,
            validate=True,
            timeout=10800,
        )

        # Calculate the total run time of Cosbench workload
        end_time = datetime.now()
        diff_time = end_time - start_time
        logger.info(f"Cosbench workload completed after {diff_time}")

        # Dispose containers and objects
        self.run_cleanup_workload(prefix=bucket_prefix,
                                  containers=buckets,
                                  objects=objects,
                                  validate=True)
Beispiel #27
0
class TestFIOBenchmark(E2ETest):
    """
    Run FIO perf test using ripsaw benchmark

    """
    def ripsaw_deploy(self, ripsaw):
        """
        Deploy the benchmark operator (formally ripsaw) CRD

        Args:
            ripsaw (obj): benchmark operator object

        """
        log.info("Deploying benchmark operator (ripsaw)")
        ripsaw.apply_crd("resources/crds/" "ripsaw_v1alpha1_ripsaw_crd.yaml")

    def es_info_backup(self, elasticsearch):
        """
        Saving the Original elastic-search IP and PORT - if defined in yaml

        Args:
            elasticsearch (obj): elasticsearch object

        """

        # for development mode use the Dev ES server
        if dev_mode:
            if "elasticsearch" in self.fio_cr["spec"]:
                self.fio_cr["spec"]["elasticsearch"] = {
                    "server":
                    defaults.ELASTICSEARCH_DEV_IP,
                    "port":
                    defaults.ELASTICSEARCE_PORT,
                    "url":
                    f"http://{defaults.ELASTICSEARCH_DEV_IP}:{defaults.ELASTICSEARCE_PORT}",
                }

        if "elasticsearch" in self.fio_cr["spec"]:
            self.fio_cr["spec"]["elasticsearch"]["url"] = (
                f"http://{self.fio_cr['spec']['elasticsearch']['server']}:"
                f"{self.fio_cr['spec']['elasticsearch']['port']}")
            self.backup_es = self.fio_cr["spec"]["elasticsearch"]
            log.info(
                f"Creating object for the Main ES server on {self.backup_es['url']}"
            )
            self.main_es = Elasticsearch([self.backup_es["url"]],
                                         verify_certs=True)
            if not self.main_es.ping():
                log.warning("Cannot connect to Main elasticsearch server")
                self.main_es = None

        else:
            log.warning(
                "Elastic Search information does not exists in YAML file")
            self.fio_cr["spec"]["elasticsearch"] = {}

        # Use the internal define elastic-search server in the test - if exist
        if elasticsearch:
            self.fio_cr["spec"]["elasticsearch"] = {
                "server":
                elasticsearch.get_ip(),
                "port":
                elasticsearch.get_port(),
                "url":
                f"http://{elasticsearch.get_ip()}:{elasticsearch.get_port()}",
            }

    def setting_storage_usage(self):
        """
        Getting the storage capacity, calculate the usage of the storage and
        setting the workload CR rile parameters.

        """

        ceph_cluster = CephCluster()
        ceph_capacity = ceph_cluster.get_ceph_capacity()
        log.info(f"Total storage capacity is {ceph_capacity} GiB")
        self.total_data_set = int(ceph_capacity * 0.4)
        self.filesize = int(
            self.fio_cr["spec"]["workload"]["args"]["filesize"].replace(
                "GiB", ""))
        # To make sure the number of App pods will not be more then 50, in case
        # of large data set, changing the size of the file each pod will work on
        if self.total_data_set > 500:
            self.filesize = int(ceph_capacity * 0.008)
            self.fio_cr["spec"]["workload"]["args"][
                "filesize"] = f"{self.filesize}GiB"
            # make sure that the storage size is larger then the file size
            self.fio_cr["spec"]["workload"]["args"][
                "storagesize"] = f"{int(self.filesize * 1.2)}Gi"
        self.fio_cr["spec"]["workload"]["args"]["servers"] = int(
            self.total_data_set / self.filesize)
        log.info(f"Total Data set to work on is : {self.total_data_set} GiB")

    def get_env_info(self):
        """
        Getting the environment information and update the workload RC if
        necessary.

        """
        self.environment = get_environment_info()
        if not self.environment["user"] == "":
            self.fio_cr["spec"]["test_user"] = self.environment["user"]
        self.fio_cr["spec"]["clustername"] = self.environment["clustername"]

        log.debug(f"Environment information is : {self.environment}")

    def setting_io_pattern(self, io_pattern):
        """
        Setting the test jobs according to the io pattern - random / sequential

        Args:
            io_pattern (str): the I/O pattern to run (random / sequential)

        """
        if io_pattern == "sequential":
            self.fio_cr["spec"]["workload"]["args"]["jobs"] = ["write", "read"]
            self.fio_cr["spec"]["workload"]["args"]["iodepth"] = 1
        if io_pattern == "random":
            self.fio_cr["spec"]["workload"]["args"]["jobs"] = [
                "randwrite", "randread"
            ]

    def deploy_and_wait_for_wl_to_start(self):
        """
        Deploy the workload and wait until it start working

        Returns:
            obj : the FIO client pod object

        """
        log.info(f"The FIO CR file is {self.fio_cr}")
        self.fio_cr_obj = OCS(**self.fio_cr)
        self.fio_cr_obj.create()

        # Wait for fio client pod to be created
        for fio_pod in TimeoutSampler(900, 20, get_pod_name_by_pattern,
                                      "fio-client",
                                      constants.RIPSAW_NAMESPACE):
            try:
                if fio_pod[0] is not None:
                    fio_client_pod = fio_pod[0]
                    break
            except IndexError:
                log.info("Bench pod not ready yet")

        # Getting the start time of the test
        self.start_time = time.strftime("%Y-%m-%dT%H:%M:%SGMT", time.gmtime())
        return fio_client_pod

    def wait_for_wl_to_finish(self, fio_client_pod):
        """
        Waiting until the workload is finished

        Args:
            fio_client_pod (obj): the FIO client pod object

        Returns:
            str: the end time of the workload

        """
        if dev_mode:
            timeout = 3600
            sleeptime = 30
        else:
            timeout = 18000
            sleeptime = 300

        log.info("Waiting for fio_client to complete")
        pod_obj = OCP(kind="pod")
        pod_obj.wait_for_resource(
            condition="Completed",
            resource_name=fio_client_pod,
            timeout=timeout,
            sleep=sleeptime,
        )

        # Getting the end time of the test
        end_time = time.strftime("%Y-%m-%dT%H:%M:%SGMT", time.gmtime())

        output = run_cmd(f"oc logs {fio_client_pod}")
        log_file_name = f"{self.full_log_path}/test-pod.log"
        with open(log_file_name, "w") as f:
            f.write(output)
        log.info(f"The Test log is can be found at : {log_file_name}")

        try:
            if "Fio failed to execute" not in output:
                log.info("FIO has completed successfully")
        except IOError:
            log.info("FIO failed to complete")

        return end_time

    def init_full_results(self, full_results):
        """
        Initialize the full results object which will send to the ES server

        Args:
            full_results (obj): an empty FIOResultsAnalyse object

        Returns:
            FIOResultsAnalyse (obj): the input object fill with data

        """
        for key in self.environment:
            full_results.add_key(key, self.environment[key])

        # Setting the global parameters of the test
        full_results.add_key("dataset", f"{self.total_data_set}GiB")
        full_results.add_key(
            "file_size", self.fio_cr["spec"]["workload"]["args"]["filesize"])
        full_results.add_key(
            "servers", self.fio_cr["spec"]["workload"]["args"]["servers"])
        full_results.add_key(
            "samples", self.fio_cr["spec"]["workload"]["args"]["samples"])
        full_results.add_key("operations",
                             self.fio_cr["spec"]["workload"]["args"]["jobs"])
        full_results.add_key("block_sizes",
                             self.fio_cr["spec"]["workload"]["args"]["bs"])
        full_results.add_key(
            "io_depth", self.fio_cr["spec"]["workload"]["args"]["iodepth"])
        full_results.add_key(
            "jobs", self.fio_cr["spec"]["workload"]["args"]["numjobs"])
        full_results.add_key(
            "runtime",
            {
                "read":
                self.fio_cr["spec"]["workload"]["args"]["read_runtime"],
                "write":
                self.fio_cr["spec"]["workload"]["args"]["write_runtime"],
            },
        )
        full_results.add_key(
            "storageclass",
            self.fio_cr["spec"]["workload"]["args"]["storageclass"])
        full_results.add_key(
            "vol_size", self.fio_cr["spec"]["workload"]["args"]["storagesize"])
        return full_results

    def copy_es_data(self, elasticsearch):
        """
        Copy data from Internal ES (if exists) to the main ES

        Args:
            elasticsearch (obj): elasticsearch object (if exits)

        Returns:
            bool: True if data was copy to the main ES False otherwise

        """
        if elasticsearch:
            log.info("Copy all data from Internal ES to Main ES")
            log.info("Dumping data from the Internal ES to tar ball file")
            elasticsearch.dumping_all_data(self.full_log_path)
            es_connection = self.backup_es
            es_connection["host"] = es_connection.pop("server")
            es_connection.pop("url")
            if elasticsearch_load(self.main_es, self.full_log_path):
                # Adding this sleep between the copy and the analyzing of the results
                # since sometimes the results of the read (just after write) are empty
                time.sleep(10)
                return True
            else:
                log.warning("Cannot upload data into the Main ES server")
                return False

    def cleanup(self):
        log.info("Deleting FIO benchmark")
        self.fio_cr_obj.delete()
        time.sleep(180)

        # Getting all PVCs created in the test (if left).
        NL = "\\n"  # NewLine character
        command = ["oc", "get", "pvc", "-n"]
        command.append(constants.RIPSAW_NAMESPACE)
        command.append("-o")
        command.append("template")
        command.append("--template")
        command.append("'{{range .items}}{{.metadata.name}}{{\"" + NL +
                       "\"}}{{end}}'")
        pvcs_list = run_command(command, out_format="list")
        log.info(f"list of all PVCs :{pvcs_list}")
        for pvc in pvcs_list:
            pvc = pvc.replace("'", "")
            run_command(f"oc -n {constants.RIPSAW_NAMESPACE} delete pvc {pvc}")

        # Getting all PVs created in the test (if left).
        command[2] = "pv"
        command[8] = (
            "'{{range .items}}{{.metadata.name}} {{.spec.claimRef.namespace}}{{\""
            + NL + "\"}}{{end}}'")
        command.remove("-n")
        command.remove(constants.RIPSAW_NAMESPACE)
        pvs_list = run_command(command, out_format="list")
        log.info(f"list of all PVs :{pvs_list}")

        for line in pvs_list:
            pv, ns = line.split(" ")
            pv = pv.replace("'", "")
            if ns == constants.RIPSAW_NAMESPACE:
                log.info(f"Going to delete {pv}")
                run_command(f"oc delete pv {pv}")

    @pytest.mark.parametrize(
        argnames=["interface", "io_pattern"],
        argvalues=[
            pytest.param(
                *[constants.CEPHBLOCKPOOL, "sequential"],
                marks=pytest.mark.polarion_id("OCS-844"),
            ),
            pytest.param(
                *[constants.CEPHFILESYSTEM, "sequential"],
                marks=pytest.mark.polarion_id("OCS-845"),
            ),
            pytest.param(
                *[constants.CEPHBLOCKPOOL, "random"],
                marks=pytest.mark.polarion_id("OCS-846"),
            ),
            pytest.param(
                *[constants.CEPHFILESYSTEM, "random"],
                marks=pytest.mark.polarion_id("OCS-847"),
            ),
        ],
    )
    def test_fio_workload_simple(self, ripsaw, es, interface, io_pattern):
        """
        This is a basic fio perf test - non-compressed volumes

        """

        self.full_log_path = get_full_test_logs_path(cname=self)
        self.full_log_path += f"-{interface}-{io_pattern}"
        log.info(f"Logs file path name is : {self.full_log_path}")

        self.ripsaw_deploy(ripsaw)

        if interface == "CephBlockPool":
            sc = constants.CEPHBLOCKPOOL_SC
        else:
            sc = constants.CEPHFILESYSTEM_SC

        # Create fio benchmark
        log.info("Create resource file for fio workload")
        self.fio_cr = templating.load_yaml(constants.FIO_CR_YAML)

        # Saving the Original elastic-search IP and PORT - if defined in yaml
        self.es_info_backup(es)

        # Setting the data set to 40% of the total storage capacity
        self.setting_storage_usage()

        self.get_env_info()

        self.fio_cr["spec"]["workload"]["args"]["storageclass"] = sc
        self.setting_io_pattern(io_pattern)
        fio_client_pod = self.deploy_and_wait_for_wl_to_start()

        # Getting the UUID from inside the benchmark pod
        uuid = ripsaw.get_uuid(fio_client_pod)
        # Setting back the original elastic-search information
        self.fio_cr["spec"]["elasticsearch"] = self.backup_es

        # Initialize the results doc file.
        full_results = self.init_full_results(
            FIOResultsAnalyse(uuid, self.fio_cr, self.full_log_path,
                              self.main_es))

        # Setting the global parameters of the test
        full_results.add_key("io_pattern", io_pattern)

        end_time = self.wait_for_wl_to_finish(fio_client_pod)
        full_results.add_key("test_time", {
            "start": self.start_time,
            "end": end_time
        })

        # Clean up fio benchmark
        self.cleanup()

        log.debug(f"Full results is : {full_results.results}")
        self.copy_es_data(es)

        full_results.analyze_results()  # Analyze the results

        # Writing the analyzed test results to the Elastic-Search server
        if self.main_es is not None:
            full_results.es_write()
            full_results.codespeed_push()  # Push results to codespeed
            # Creating full link to the results on the ES server
            log.info(
                f"The Result can be found at ; {full_results.results_link()}")

    @skipif_ocs_version("<4.6")
    @pytest.mark.parametrize(
        argnames=["io_pattern", "bs", "cmp_ratio"],
        argvalues=[
            pytest.param(*["random", "1024KiB", 60]),
            pytest.param(*["random", "64KiB", 60]),
            pytest.param(*["random", "16KiB", 60]),
            pytest.param(*["sequential", "1024KiB", 60]),
            pytest.param(*["sequential", "64KiB", 60]),
            pytest.param(*["sequential", "16KiB", 60]),
        ],
    )
    def test_fio_compressed_workload(self, ripsaw, es, storageclass_factory,
                                     io_pattern, bs, cmp_ratio):
        """
        This is a basic fio perf test which run on compression enabled volume

        Args:
            io_pattern (str): the I/O pattern to do - random / sequential
            bs (str): block size to use in the test
            cmp_ratio (int): the expected compression ratio

        """

        self.full_log_path = get_full_test_logs_path(cname=self)
        self.full_log_path += f"-{io_pattern}-{bs}-{cmp_ratio}"
        log.info(f"Logs file path name is : {self.full_log_path}")

        self.ripsaw_deploy(ripsaw)

        log.info("Creating compressed pool & SC")
        sc_obj = storageclass_factory(
            interface=constants.CEPHBLOCKPOOL,
            new_rbd_pool=True,
            replica=3,
            compression="aggressive",
        )

        sc = sc_obj.name
        pool_name = run_cmd(
            f"oc get sc {sc} -o jsonpath={{'.parameters.pool'}}")
        # Create fio benchmark
        log.info("Create resource file for fio workload")
        self.fio_cr = templating.load_yaml(
            "ocs_ci/templates/workloads/fio/benchmark_fio_cmp.yaml")
        self.fio_cr["spec"]["workload"]["args"]["bs"] = [bs]
        self.fio_cr["spec"]["workload"]["args"]["prefill_bs"] = bs
        self.fio_cr["spec"]["workload"]["args"]["cmp_ratio"] = cmp_ratio

        # Saving the Original elastic-search IP and PORT - if defined in yaml
        self.es_info_backup(es)

        # Setting the data set to 40% of the total storage capacity
        self.setting_storage_usage()

        self.get_env_info()

        self.fio_cr["spec"]["workload"]["args"]["storageclass"] = sc
        self.setting_io_pattern(io_pattern)
        fio_client_pod = self.deploy_and_wait_for_wl_to_start()

        # Getting the UUID from inside the benchmark pod
        uuid = ripsaw.get_uuid(fio_client_pod)
        # Setting back the original elastic-search information
        self.fio_cr["spec"]["elasticsearch"] = self.backup_es

        # Initialize the results doc file.
        full_results = self.init_full_results(
            FIOResultsAnalyse(uuid, self.fio_cr, self.full_log_path,
                              self.main_es))

        # Setting the global parameters of the test
        full_results.add_key("io_pattern", io_pattern)

        end_time = self.wait_for_wl_to_finish(fio_client_pod)
        full_results.add_key("test_time", {
            "start": self.start_time,
            "end": end_time
        })

        # Clean up fio benchmark
        self.copy_es_data(es)

        log.info("verifying compression ratio")
        ratio = calculate_compression_ratio(pool_name)

        full_results.add_key("cmp_ratio", {
            "expected": cmp_ratio,
            "actual": ratio
        })
        full_results.analyze_results()  # Analyze the results
        # TODO: change the info message to Warning/Error after
        #  prefill at ripsaw will be fixed Ripsaw PR - #505
        if (cmp_ratio + 5) < ratio or ratio < (cmp_ratio - 5):
            log.info(f"The compression ratio is {ratio}% "
                     f"while the expected ratio is {cmp_ratio}%")
        else:
            log.info(f"The compression ratio is {ratio}%")

        # Writing the analyzed test results to the Elastic-Search server
        if self.main_es is not None:
            full_results.es_write()
            # Creating full link to the results on the ES server
            log.info(
                f"The Result can be found at : {full_results.results_link()}")

        self.cleanup()
        sc_obj.delete()
        sc_obj.ocp.wait_for_delete(resource_name=sc, timeout=300, sleep=5)
        log.debug(f"Full results is : {full_results.results}")
Beispiel #28
0
class AMQ(object):
    """
    Workload operation using AMQ
    """
    def __init__(self, **kwargs):
        """
        Initializer function

        Args:
            kwargs (dict):
                Following kwargs are valid
                namespace: namespace for the operator
                repo: AMQ repo where all necessary yaml file are there - a github link
                branch: branch to use from the repo
        """
        self.args = kwargs
        self.repo = self.args.get('repo', constants.KAFKA_OPERATOR)
        self.branch = self.args.get('branch', 'master')
        self.ocp = OCP()
        self.ns_obj = OCP(kind='namespace')
        self.pod_obj = OCP(kind='pod')
        self.kafka_obj = OCP(kind='Kafka')
        self.kafka_connect_obj = OCP(kind="KafkaConnect")
        self.kafka_bridge_obj = OCP(kind="KafkaBridge")
        self.kafka_topic_obj = OCP(kind="KafkaTopic")
        self.kafka_user_obj = OCP(kind="KafkaUser")
        self.amq_is_setup = False
        self.messaging = False
        self._clone_amq()

    def _clone_amq(self):
        """
        clone the amq repo
        """
        self.dir = tempfile.mkdtemp(prefix='amq_')
        try:
            log.info(f'cloning amq in {self.dir}')
            git_clone_cmd = f'git clone -b {self.branch} {self.repo} '
            run(git_clone_cmd, shell=True, cwd=self.dir, check=True)
            self.amq_dir = "strimzi-kafka-operator/install/cluster-operator/"
            self.amq_kafka_pers_yaml = "strimzi-kafka-operator/examples/kafka/kafka-persistent.yaml"
            self.amq_kafka_connect_yaml = "strimzi-kafka-operator/examples/connect/kafka-connect.yaml"
            self.amq_kafka_bridge_yaml = "strimzi-kafka-operator/examples/bridge/kafka-bridge.yaml"
            self.kafka_topic_yaml = "strimzi-kafka-operator/examples/topic/kafka-topic.yaml"
            self.kafka_user_yaml = "strimzi-kafka-operator/examples/user/kafka-user.yaml"
            self.hello_world_producer_yaml = constants.HELLO_WORLD_PRODUCER_YAML
            self.hello_world_consumer_yaml = constants.HELLO_WORLD_CONSUMER_YAML

        except (CommandFailed, CalledProcessError) as cf:
            log.error('Error during cloning of amq repository')
            raise cf

    def create_namespace(self, namespace):
        """
        create namespace for amq

        Args:
            namespace (str): Namespace for amq pods
        """
        self.ocp.new_project(namespace)

    def setup_amq_cluster_operator(self, namespace=constants.AMQ_NAMESPACE):
        """
        Function to setup amq-cluster_operator,
        the file is pulling from github
        it will make sure cluster-operator pod is running

        Args:
            namespace (str): Namespace for AMQ pods

        """

        # Namespace for amq
        try:
            self.create_namespace(namespace)
        except CommandFailed as ef:
            if f'project.project.openshift.io "{namespace}" already exists' not in str(
                    ef):
                raise ef

        # Create strimzi-cluster-operator pod
        run(
            f"for i in `(ls strimzi-kafka-operator/install/cluster-operator/)`;"
            f"do sed 's/{namespace}/myproject/g' strimzi-kafka-operator/install/cluster-operator/$i;done",
            shell=True,
            check=True,
            cwd=self.dir)
        run(f'oc apply -f {self.amq_dir} -n {namespace}',
            shell=True,
            check=True,
            cwd=self.dir)
        time.sleep(10)

        #  Check strimzi-cluster-operator pod created
        if self.is_amq_pod_running(pod_pattern="cluster-operator",
                                   expected_pods=1):
            log.info("strimzi-cluster-operator pod is in running state")
        else:
            raise ResourceWrongStatusException(
                "strimzi-cluster-operator pod is not getting to running state")

    def is_amq_pod_running(self,
                           pod_pattern,
                           expected_pods,
                           namespace=constants.AMQ_NAMESPACE):
        """
        The function checks if provided pod_pattern finds a pod and if the status is running or not

        Args:
            pod_pattern (str): the pattern for pod
            expected_pods (int): Number of pods
            namespace (str): Namespace for amq pods

        Returns:
            bool: status of pod: True if found pod is running

        """

        _rc = True

        for pod in TimeoutSampler(300, 10, get_pod_name_by_pattern,
                                  pod_pattern, namespace):
            try:
                if pod is not None and len(pod) == expected_pods:
                    amq_pod = pod
                    break
            except IndexError as ie:
                log.error(" pod not ready yet")
                raise ie

        # checking pod status
        for pod in amq_pod:
            if (self.pod_obj.wait_for_resource(
                    condition='Running',
                    resource_name=pod,
                    timeout=1600,
                    sleep=30,
            )):
                log.info(f"{pod} pod is up and running")
            else:
                _rc = False
                log.error(f"{pod} pod is not running")

        return _rc

    def setup_amq_kafka_persistent(self, sc_name, size=100, replicas=3):
        """
        Function to setup amq-kafka-persistent, the file is pulling from github
        it will make kind: Kafka and will make sure the status is running

        Args:
            sc_name (str): Name of sc
            size (int): Size of the storage in Gi
            replicas (int): Number of kafka and zookeeper pods to be created

        return : kafka_persistent

        """
        try:
            kafka_persistent = templating.load_yaml(
                os.path.join(self.dir, self.amq_kafka_pers_yaml))
            kafka_persistent['spec']['kafka']['replicas'] = replicas
            kafka_persistent['spec']['kafka']['storage']['volumes'][0][
                'class'] = sc_name
            kafka_persistent['spec']['kafka']['storage']['volumes'][0][
                'size'] = f"{size}Gi"

            kafka_persistent['spec']['zookeeper']['replicas'] = replicas
            kafka_persistent['spec']['zookeeper']['storage']['class'] = sc_name
            kafka_persistent['spec']['zookeeper']['storage'][
                'size'] = f"{size}Gi"
            self.kafka_persistent = OCS(**kafka_persistent)
            self.kafka_persistent.create()

        except (CommandFailed, CalledProcessError) as cf:
            log.error('Failed during setup of AMQ Kafka-persistent')
            raise cf
        time.sleep(40)

        if self.is_amq_pod_running(
                pod_pattern="my-cluster-zookeeper",
                expected_pods=replicas) and self.is_amq_pod_running(
                    pod_pattern="my-cluster-kafka", expected_pods=replicas):
            return self.kafka_persistent
        else:
            raise ResourceWrongStatusException(
                "my-cluster-kafka and my-cluster-zookeeper "
                "Pod is not getting to running state")

    def setup_amq_kafka_connect(self):
        """
        The function is to setup amq-kafka-connect, the yaml file is pulling from github
        it will make kind: KafkaConnect and will make sure the status is running

        Returns: kafka_connect object
        """
        try:
            kafka_connect = templating.load_yaml(
                os.path.join(self.dir, self.amq_kafka_connect_yaml))
            self.kafka_connect = OCS(**kafka_connect)
            self.kafka_connect.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error('Failed during setup of AMQ KafkaConnect')
            raise cf

        if self.is_amq_pod_running(pod_pattern="my-connect-cluster-connect",
                                   expected_pods=1):
            return self.kafka_connect
        else:
            raise ResourceWrongStatusException(
                "my-connect-cluster-connect pod is not getting to running state"
            )

    def setup_amq_kafka_bridge(self):
        """
        Function to setup amq-kafka, the file file is pulling from github
        it will make kind: KafkaBridge and will make sure the pod status is running

        Return: kafka_bridge object
        """
        try:
            kafka_bridge = templating.load_yaml(
                os.path.join(self.dir, self.amq_kafka_bridge_yaml))
            self.kafka_bridge = OCS(**kafka_bridge)
            self.kafka_bridge.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error('Failed during setup of AMQ KafkaConnect')
            raise cf
        # Making sure the kafka_bridge is running
        if self.is_amq_pod_running(pod_pattern="my-bridge-bridge",
                                   expected_pods=1):
            return self.kafka_bridge
        else:
            raise ResourceWrongStatusException(
                "kafka_bridge_pod pod is not getting to running state")

    def create_kafka_topic(self, name='my-topic', partitions=1, replicas=1):
        """
        Creates kafka topic

        Args:
            name (str): Name of the kafka topic
            partitions (int): Number of partitions
            replicas (int): Number of replicas

        Return: kafka_topic object
        """
        try:
            kafka_topic = templating.load_yaml(
                os.path.join(self.dir, self.kafka_topic_yaml))
            kafka_topic["metadata"]["name"] = name
            kafka_topic["spec"]["partitions"] = partitions
            kafka_topic["spec"]["replicas"] = replicas
            self.kafka_topic = OCS(**kafka_topic)
            self.kafka_topic.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error('Failed during creating of Kafka topic')
            raise cf

        # Making sure kafka topic created
        if self.kafka_topic_obj.get(resource_name=name):
            return self.kafka_topic
        else:
            raise ResourceWrongStatusException("kafka topic is not created")

    def create_kafka_user(self, name="my-user"):
        """
        Creates kafka user

        Args:
             name (str): Name of the kafka user

        Return: kafka_user object

        """
        try:
            kafka_user = templating.load_yaml(
                os.path.join(self.dir, self.kafka_user_yaml))
            kafka_user["metadata"]["name"] = name
            self.kafka_user = OCS(**kafka_user)
            self.kafka_user.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error('Failed during creating of Kafka user')
            raise cf

        # Making sure kafka user created
        if self.kafka_user_obj.get(resource_name=name):
            return self.kafka_user
        else:
            raise ResourceWrongStatusException("kafka user is not created")

    def create_producer_pod(self, num_of_pods=1, value='10000'):
        """
        Creates producer pods

        Args:
            num_of_pods (int): Number of producer pods to be created
            value (str): Number of the messages to be sent

        Returns: producer pod object

        """
        try:
            producer_pod = templating.load_yaml(
                constants.HELLO_WORLD_PRODUCER_YAML)
            producer_pod["spec"]["replicas"] = num_of_pods
            producer_pod["spec"]["template"]["spec"]["containers"][0]["env"][
                4]["value"] = value
            self.producer_pod = OCS(**producer_pod)
            self.producer_pod.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error('Failed during creation of producer pod')
            raise cf

        # Making sure the producer pod is running
        if self.is_amq_pod_running(pod_pattern="hello-world-producer",
                                   expected_pods=num_of_pods):
            return self.producer_pod
        else:
            raise ResourceWrongStatusException(
                "producer pod is not getting to running state")

    def create_consumer_pod(self, num_of_pods=1, value='10000'):
        """
        Creates producer pods

        Args:
            num_of_pods (int): Number of consumer pods to be created
            value (str): Number of messages to be received

        Returns: consumer pod object

        """
        try:
            consumer_pod = templating.load_yaml(
                constants.HELLO_WORLD_CONSUMER_YAML)
            consumer_pod["spec"]["replicas"] = num_of_pods
            consumer_pod["spec"]["template"]["spec"]["containers"][0]["env"][
                4]["value"] = value
            self.consumer_pod = OCS(**consumer_pod)
            self.consumer_pod.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error('Failed during creation of consumer pod')
            raise cf

        # Making sure the producer pod is running
        if self.is_amq_pod_running(pod_pattern="hello-world-consumer",
                                   expected_pods=num_of_pods):
            return self.consumer_pod
        else:
            raise ResourceWrongStatusException(
                "consumer pod is not getting to running state")

    def validate_msg(self,
                     pod,
                     namespace=constants.AMQ_NAMESPACE,
                     value='10000',
                     since_time=1800):
        """
        Validate if messages are sent or received

        Args:
            pod (str): Name of the pod
            namespace (str): Namespace of the pod
            value (str): Number of messages are sent
            since_time (int): Number of seconds to required to sent the msg

        Returns:
            bool : True if all messages are sent/received

        """
        cmd = f"oc logs -n {namespace} {pod} --since={since_time}s"
        msg = run_cmd(cmd)
        if msg.find(f"Hello world - {int(value) - 1} ") is -1:
            return False
        else:
            return True

    def validate_messages_are_produced(self,
                                       namespace=constants.AMQ_NAMESPACE,
                                       value='10000',
                                       since_time=1800):
        """
        Validates if all messages are sent in producer pod

        Args:
            namespace (str): Namespace of the pod
            value (str): Number of messages are sent
            since_time (int): Number of seconds to required to sent the msg

        Raises exception on failures

        """
        # ToDo: Support multiple topics and users
        producer_pod_objs = [
            get_pod_obj(pod) for pod in get_pod_name_by_pattern(
                'hello-world-produce', namespace)
        ]
        for pod in producer_pod_objs:
            for msg in TimeoutSampler(900, 30, self.validate_msg, pod.name,
                                      namespace, value, since_time):
                if msg:
                    break
        log.error("Few messages are not sent")
        raise Exception("All messages are not sent from the producer pod")

    def validate_messages_are_consumed(self,
                                       namespace=constants.AMQ_NAMESPACE,
                                       value='10000',
                                       since_time=1800):
        """
        Validates if all messages are received in consumer pod

        Args:
            namespace (str): Namespace of the pod
            value (str): Number of messages are recieved
            since_time (int): Number of seconds to required to receive the msg

        Raises exception on failures

        """
        # ToDo: Support multiple topics and users
        consumer_pod_objs = [
            get_pod_obj(pod) for pod in get_pod_name_by_pattern(
                'hello-world-consumer', namespace)
        ]
        for pod in consumer_pod_objs:
            for msg in TimeoutSampler(900, 30, self.validate_msg, pod.name,
                                      namespace, value, since_time):
                if msg:
                    log.info(
                        "Consumer pod received all messages sent by producer")
                    break
        log.error("Few messages are not received")
        raise Exception("Consumer pod received all messages sent by producer")

    def run_in_bg(self,
                  namespace=constants.AMQ_NAMESPACE,
                  value='10000',
                  since_time=1800):
        """
        Validate messages are produced and consumed in bg

        Args:
            namespace (str): Namespace of the pod
            value (str): Number of messages to be sent and received
            since_time (int): Number of seconds to required to sent and receive msg

        """
        # Todo: Check for each messages sent and received
        log.info("Running open messages on pod in bg")
        threads = []

        thread1 = Thread(target=self.validate_messages_are_produced,
                         args=(namespace, value, since_time))
        thread1.start()
        time.sleep(10)
        threads.append(thread1)

        thread2 = Thread(target=self.validate_messages_are_consumed,
                         args=(namespace, value, since_time))
        thread2.start()
        time.sleep(10)
        threads.append(thread2)

        return threads

    # ToDo: Install helm and get kafka metrics

    def create_messaging_on_amq(self,
                                topic_name='my-topic',
                                user_name="my-user",
                                partitions=1,
                                replicas=1,
                                num_of_producer_pods=1,
                                num_of_consumer_pods=1,
                                value='10000'):
        """
        Creates workload using Open Messaging tool on amq cluster

        Args:
            topic_name (str): Name of the topic to be created
            user_name (str): Name of the user to be created
            partitions (int): Number of partitions of topic
            replicas (int): Number of replicas of topic
            num_of_producer_pods (int): Number of producer pods to be created
            num_of_consumer_pods (int): Number of consumer pods to be created
            value (str): Number of messages to be sent and received

        """
        self.create_kafka_topic(topic_name, partitions, replicas)
        self.create_kafka_user(user_name)
        self.create_producer_pod(num_of_producer_pods, value)
        self.create_consumer_pod(num_of_consumer_pods, value)
        self.messaging = True

    def setup_amq_cluster(self,
                          sc_name,
                          namespace=constants.AMQ_NAMESPACE,
                          size=100,
                          replicas=3):
        """
        Creates amq cluster with persistent storage.

        Args:
            sc_name (str): Name of sc
            namespace (str): Namespace for amq cluster
            size (int): Size of the storage
            replicas (int): Number of kafka and zookeeper pods to be created

        """
        self.setup_amq_cluster_operator(namespace)
        self.setup_amq_kafka_persistent(sc_name, size, replicas)
        self.setup_amq_kafka_connect()
        self.setup_amq_kafka_bridge()
        self.amq_is_setup = True
        return self

    def cleanup(self, namespace=constants.AMQ_NAMESPACE):
        """
        Clean up function,
        will start to delete from amq cluster operator
        then amq-connector, persistent, bridge, at the end it will delete the created namespace

        Args:
            namespace (str): Created namespace for amq
        """
        if self.amq_is_setup:
            if self.messaging:
                self.consumer_pod.delete()
                self.producer_pod.delete()
                self.kafka_user.delete()
                self.kafka_topic.delete()
            self.kafka_persistent.delete()
            self.kafka_connect.delete()
            self.kafka_bridge.delete()
            run_cmd(f'oc delete -f {self.amq_dir}',
                    shell=True,
                    check=True,
                    cwd=self.dir)
        run_cmd(f'oc delete project {namespace}')

        # Reset namespace to default
        switch_to_default_rook_cluster_project()
        self.ns_obj.wait_for_delete(resource_name=namespace)
Beispiel #29
0
class TestCouchbaseWorkload(E2ETest):
    """
    Main couchbase workload class
    """
    COUCHBASE_OPERATOR = 'couchbase-operator-namespace'
    WAIT_FOR_TIME = 600
    admission_parts = [
        constants.COUCHBASE_ADMISSION_SERVICE_ACCOUNT_YAML,
        constants.COUCHBASE_ADMISSION_CLUSTER_ROLE_YAML,
        constants.COUCHBASE_ADMISSION_CLUSTER_ROLE_BINDING_YAML,
        constants.COUCHBASE_ADMISSION_SECRET_YAML,
        constants.COUCHBASE_ADMISSION_DEPLOYMENT_YAML,
        constants.COUCHBASE_ADMISSION_SERVICE_YAML,
        constants.COUCHBASE_MUTATING_WEBHOOK_YAML,
        constants.COUCHBASE_VALIDATING_WEBHOOK_YAML
    ]
    pod_obj = OCP(kind='pod')
    couchbase_pod = OCP(kind='pod')
    secretsadder = OCP(kind='pod')
    admission_pod = []
    cb_worker = OCS()
    cb_examples = OCS()

    def add_serviceaccount_secret(self, acct_name, dockerstr):
        """
        Add secret for serviceaccount

        Args:
            acct_name (str): Name of the service account
            dockerstr (str): Docker secret

        """
        self.secretsadder.exec_oc_cmd(
            f"secrets add serviceaccount/{acct_name} secrets/{dockerstr} --for=pull"
        )

    def is_up_and_running(self, pod_name, ocp_value):
        """
        Test if the pod specified is up and running.

        Args:
            pod_name (str): Name of pod being checked.
            ocp_value (OCP): object used for running oc commands

        Returns:
            bool; True if pod is running, False otherwise

        """
        if not pod_name:
            return False
        pod_info = ocp_value.exec_oc_cmd(f"get pods {pod_name} -o json")
        if pod_info['status']['containerStatuses'][0]['ready']:
            if 'running' in pod_info['status']['containerStatuses'][0]['state']:
                return True
        return False

    def test_couchbase_workload_simple(self, pillowfight):
        """
        Deploy a Couchbase server and pillowfight workload using operator

        The couchbase workers do not come up unless there is an admission controller
        running.  The admission controller is started from the default project prior
        to bringing up the operator.  Secrets, rolebindings and serviceaccounts
        need to also be generated.

        Once the couchbase operator is running, we need to wait for the three
        worker pods to also be up.  Then a pillowfight task is started.

        After the pillowfight task has finished, the log is collected and
        analyzed.

        Raises:
            Exception: If pillowfight results indicate that a minimum performance
                level is not reached (1 second response time, less than 1000 ops
                per second)

        """
        # Create admission controller
        log.info("Create admission controller process for Couchbase")

        switch_to_project('default')
        self.up_adm_chk = OCP(namespace="default")
        self.up_check = OCP(namespace=self.COUCHBASE_OPERATOR)
        for adm_yaml in self.admission_parts:
            adm_data = templating.load_yaml(adm_yaml)
            adm_obj = OCS(**adm_data)
            adm_obj.create()

        # Wait for admission pod to be created
        for adm_pod in TimeoutSampler(
            self.WAIT_FOR_TIME,
            3,
            get_pod_name_by_pattern,
            'couchbase-operator-admission',
            'default'
        ):
            try:
                if self.is_up_and_running(adm_pod[0], self.up_adm_chk):
                    self.admission_pod = adm_pod[0]
                    break
            except IndexError:
                log.info("Admission pod is not ready yet")

        # Wait for admission pod to be running
        log.info("Waiting for admission pod to be running")
        self.pod_obj.wait_for_resource(
            condition='Running',
            resource_name=self.admission_pod,
            timeout=self.WAIT_FOR_TIME,
            sleep=10,
        )
        self.pod_obj.new_project(self.COUCHBASE_OPERATOR)
        couchbase_data = templating.load_yaml(
            constants.COUCHBASE_CRD_YAML
        )
        self.couchbase_obj = OCS(**couchbase_data)
        self.couchbase_obj.create()
        op_data = templating.load_yaml(constants.COUCHBASE_OPERATOR_ROLE)
        self.operator_role = OCS(**op_data)
        self.operator_role.create()
        self.serviceaccount = OCP(namespace=self.COUCHBASE_OPERATOR)
        self.serviceaccount.exec_oc_cmd(
            "create serviceaccount couchbase-operator"
        )

        dockercfgs = self.serviceaccount.exec_oc_cmd("get secrets")
        startloc = dockercfgs.find('couchbase-operator-dockercfg')
        newdockerstr = dockercfgs[startloc:]
        endloc = newdockerstr.find(' ')
        dockerstr = newdockerstr[:endloc]
        self.add_serviceaccount_secret("couchbase-operator", dockerstr)
        self.add_serviceaccount_secret("default", dockerstr)
        self.rolebinding = OCP(namespace=self.COUCHBASE_OPERATOR)
        rolebind_cmd = "".join([
            "create rolebinding couchbase-operator-rolebinding ",
            "--role couchbase-operator ",
            "--serviceaccount couchbase-operator-namespace:couchbase-operator"
        ])
        self.rolebinding.exec_oc_cmd(rolebind_cmd)
        dep_data = templating.load_yaml(constants.COUCHBASE_OPERATOR_DEPLOY)
        self.cb_deploy = OCS(**dep_data)
        self.cb_deploy.create()
        # Wait for couchbase operator pod to be running
        for couchbase_pod in TimeoutSampler(
            self.WAIT_FOR_TIME,
            3,
            get_pod_name_by_pattern,
            'couchbase-operator',
            self.COUCHBASE_OPERATOR
        ):
            try:
                if self.is_up_and_running(couchbase_pod[0], self.up_check):
                    break
            except IndexError:
                log.info("Couchbase operator is not up")
        cb_work = templating.load_yaml(constants.COUCHBASE_WORKER_SECRET)
        self.cb_worker = OCS(**cb_work)
        self.cb_worker.create()
        cb_example = templating.load_yaml(constants.COUCHBASE_WORKER_EXAMPLE)
        self.cb_examples = OCS(**cb_example)
        self.cb_examples.create()
        # Wait for last of three workers to be running.
        for cb_wrk_pod in TimeoutSampler(
            self.WAIT_FOR_TIME,
            3,
            get_pod_name_by_pattern,
            'cb-example-0002',
            self.COUCHBASE_OPERATOR
        ):
            try:
                if self.is_up_and_running(cb_wrk_pod[0], self.up_check):
                    # once last pod is up, make sure all are ready
                    counter = 0
                    for wpodn in range(0, 3):
                        cbw_pod = f"cb-example-{wpodn:04}"
                        if self.is_up_and_running(cbw_pod, self.up_check):
                            counter += 1
                    if counter == 3:
                        break
            except IndexError:
                log.info("Couchbase workers are not up")

        pillowfight.run_pillowfights()
        pillowfight.analyze_all()

    def teardown(self):
        """
        Delete objects created in roughly reverse order of how they were created.

        """
        self.cb_examples.delete()
        self.cb_worker.delete()
        self.cb_deploy.delete()
        self.pod_obj.exec_oc_cmd(
            command="delete rolebinding couchbase-operator-rolebinding"
        )
        self.pod_obj.exec_oc_cmd(
            command="delete serviceaccount couchbase-operator"
        )
        self.operator_role.delete()
        self.couchbase_obj.delete()
        switch_to_project('default')
        self.pod_obj.delete_project(self.COUCHBASE_OPERATOR)
        for adm_yaml in self.admission_parts:
            adm_data = templating.load_yaml(adm_yaml)
            adm_obj = OCS(**adm_data)
            adm_obj.delete()
        # Before the code below was added, the teardown task would sometimes
        # fail with the leftover objects because it would still see one of the
        # couchbase pods.
        for admin_pod in TimeoutSampler(
            self.WAIT_FOR_TIME,
            3,
            get_pod_name_by_pattern,
            'couchbase',
            'default'
        ):
            if admin_pod:
                continue
            else:
                break
    def test_pvc_snapshot_performance_multiple_files(self, file_size, files,
                                                     threads, interface):
        """
        Run SmallFile Workload and the take snapshot.
        test will run with 1M of file on the volume - total data set
        is the same for all tests, ~30GiB, and then take snapshot and measure
        the time it takes.
        the test will run 3 time to check consistency.

        Args:
            file_size (int): the size of the file to be create - in KiB
            files (int): number of files each thread will create
            threads (int): number of threads will be used in the workload
            interface (str): the volume interface that will be used
                             CephBlockPool / CephFileSystem

        Raises:
            TimeoutError : in case of creation files take too long time
                           more then 2 Hours

        """

        # Deploying elastic-search server in the cluster for use by the
        # SmallFiles workload, since it is mandatory for the workload.
        # This is deployed once for all test iterations and will be deleted
        # in the end of the test.
        self.es = ElasticSearch()

        # Loading the main template yaml file for the benchmark and update some
        # fields with new values
        sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

        if interface == constants.CEPHBLOCKPOOL:
            storageclass = constants.DEFAULT_STORAGECLASS_RBD
        else:
            storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS
        log.info(f"Using {storageclass} Storageclass")

        # Setting up the parameters for this test
        sf_data["spec"]["workload"]["args"]["samples"] = 1
        sf_data["spec"]["workload"]["args"]["operation"] = ["create"]
        sf_data["spec"]["workload"]["args"]["file_size"] = file_size
        sf_data["spec"]["workload"]["args"]["files"] = files
        sf_data["spec"]["workload"]["args"]["threads"] = threads
        sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass
        sf_data["spec"]["elasticsearch"] = {
            "url": f"http://{self.es.get_ip()}:{self.es.get_port()}"
        }
        """
        Calculating the size of the volume that need to be test, it should
        be at least twice in the size then the size of the files, and at
        least 100Gi.

        Since the file_size is in Kb and the vol_size need to be in Gb, more
        calculation is needed.
        """
        total_files = int(files * threads)
        total_data = int(files * threads * file_size / constants.GB2KB)
        data_set = int(total_data * 3)  # calculate data with replica
        vol_size = data_set if data_set >= 100 else 100
        sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi"

        environment = get_environment_info()
        if not environment["user"] == "":
            sf_data["spec"]["test_user"] = environment["user"]
        else:
            # since full results object need this parameter, initialize it from CR file
            environment["user"] = sf_data["spec"]["test_user"]

        sf_data["spec"]["clustername"] = environment["clustername"]
        log.debug(f"The smallfile yaml file is {sf_data}")

        # Deploy the benchmark-operator, so we can use the SmallFiles workload
        # to fill up the volume with files, and switch to the benchmark-operator namespace.
        log.info("Deploy the benchmark-operator")
        self.deploy_benchmark_operator()
        switch_to_project(BMO_NAME)

        all_results = []

        self.results_path = get_full_test_logs_path(cname=self)
        log.info(f"Logs file path name is : {self.full_log_path}")

        # Produce ES report
        # Collecting environment information
        self.get_env_info()

        # Initialize the results doc file.
        self.full_results = self.init_full_results(
            ResultsAnalyse(
                self.uuid,
                self.crd_data,
                self.full_log_path,
                "pvc_snapshot_perf_multiple_files",
            ))
        self.full_results.add_key("file_size_inKB", file_size)
        self.full_results.add_key("threads", threads)
        self.full_results.add_key("interface", interface)
        for test_num in range(self.tests_numbers):

            test_results = {"creation_time": None, "csi_creation_time": None}

            # deploy the smallfile workload
            log.info("Running SmallFile bench")
            sf_obj = OCS(**sf_data)
            sf_obj.create()

            # wait for benchmark pods to get created - takes a while
            for bench_pod in TimeoutSampler(
                    240,
                    10,
                    get_pod_name_by_pattern,
                    "smallfile-client",
                    BMO_NAME,
            ):
                try:
                    if bench_pod[0] is not None:
                        small_file_client_pod = bench_pod[0]
                        break
                except IndexError:
                    log.info("Bench pod not ready yet")

            bench_pod = OCP(kind="pod", namespace=BMO_NAME)
            log.info("Waiting for SmallFile benchmark to Run")
            assert bench_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=small_file_client_pod,
                sleep=30,
                timeout=600,
            )
            # Initialize the pvc_name variable so it will not be in loop scope only.
            pvc_name = ""
            for item in bench_pod.get()["items"]:
                if item.get("metadata").get("name") == small_file_client_pod:
                    for volume in item.get("spec").get("volumes"):
                        if "persistentVolumeClaim" in volume:
                            pvc_name = volume["persistentVolumeClaim"][
                                "claimName"]
                            break
            log.info(f"Benchmark PVC name is : {pvc_name}")
            # Creation of 1M files on CephFS can take a lot of time
            timeout = 7200
            while timeout >= 0:
                logs = bench_pod.get_logs(name=small_file_client_pod)
                if "RUN STATUS DONE" in logs:
                    break
                timeout -= 30
                if timeout == 0:
                    raise TimeoutError(
                        "Timed out waiting for benchmark to complete")
                time.sleep(30)
            log.info(f"Smallfile test ({test_num + 1}) finished.")

            # Taking snapshot of the PVC (which contain files)
            snap_name = pvc_name.replace("claim", "snapshot-")
            log.info(f"Taking snapshot of the PVC {pvc_name}")
            log.info(f"Snapshot name : {snap_name}")

            start_time = datetime.datetime.utcnow().strftime(
                "%Y-%m-%dT%H:%M:%SZ")

            test_results["creation_time"] = self.measure_create_snapshot_time(
                pvc_name=pvc_name,
                snap_name=snap_name,
                namespace=BMO_NAME,
                interface=interface,
                start_time=start_time,
            )
            log.info(
                f"Snapshot with name {snap_name} and id {self.snap_uid} creation time is"
                f' {test_results["creation_time"]} seconds')

            test_results[
                "csi_creation_time"] = performance_lib.measure_csi_snapshot_creation_time(
                    interface=interface,
                    snapshot_id=self.snap_uid,
                    start_time=start_time)
            log.info(
                f"Snapshot with name {snap_name} and id {self.snap_uid} csi creation time is"
                f' {test_results["csi_creation_time"]} seconds')

            all_results.append(test_results)

            # Delete the smallfile workload - which will delete also the PVC
            log.info("Deleting the smallfile workload")
            if sf_obj.delete(wait=True):
                log.info("The smallfile workload was deleted successfully")

            # Delete VolumeSnapshots
            log.info("Deleting the snapshots")
            if self.snap_obj.delete(wait=True):
                log.info("The snapshot deleted successfully")
            log.info("Verify (and wait if needed) that ceph health is OK")
            ceph_health_check(tries=45, delay=60)

            # Sleep for 1 Min. between test samples
            time.sleep(60)

        # Cleanup the elasticsearch instance.
        log.info("Deleting the elastic-search instance")
        self.es.cleanup()

        creation_times = [t["creation_time"] for t in all_results]
        avg_c_time = statistics.mean(creation_times)
        csi_creation_times = [t["csi_creation_time"] for t in all_results]
        avg_csi_c_time = statistics.mean(csi_creation_times)

        t_dateset = int(data_set / 3)

        log.info(f"Full test report for {interface}:")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All snapshot creation results are {creation_times} seconds")
        log.info(
            f"The average snapshot creation time is : {avg_c_time} seconds")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All snapshot csi creation results are {csi_creation_times}")
        log.info(
            f"The average csi snapshot creation time is : {avg_csi_c_time}")

        log.info(f"Number of Files on the volume : {total_files:,}, "
                 f"Total dataset : {t_dateset} GiB")

        self.full_results.add_key("avg_snapshot_creation_time_insecs",
                                  avg_c_time)
        self.full_results.all_results["total_files"] = total_files
        self.full_results.all_results["total_dataset"] = t_dateset
        self.full_results.all_results["creation_time"] = creation_times
        self.full_results.all_results["csi_creation_time"] = csi_creation_times

        # Write the test results into the ES server
        log.info("writing results to elastic search server")
        if self.full_results.es_write():
            res_link = self.full_results.results_link()
            # write the ES link to the test results in the test log.
            log.info(f"The result can be found at : {res_link}")

            # Create text file with results of all subtest
            self.write_result_to_file(res_link)