def verify_load_balancer_completed(core_api, stack_id, stack_config, role):
    # getaddrinfo(host, port, 0, SOCK_STREAM)
    hosts = services.get_load_balancer_hosts(core_api, stack_id, role,
                                             stack_config["namespace"])
    if not len(hosts):
        raise errors.RetryOperation("waiting for %s load balancer hostname" %
                                    role)
    for host in hosts:
        try:
            import socket
            socket.gethostbyname(host)
        except socket.error:
            raise errors.RetryOperation(
                "Waiting for %s ingress connection (cannot resolve hostname)" %
                role)
        pass
    if role == services.standalone_role or role == services.deployer_role or role == services.search_head_role or role == services.cluster_master_role:
        try:
            instances.create_client(core_api, stack_id, stack_config, role)
        except ssl.SSLEOFError:
            raise errors.RetryOperation(
                "Waiting for %s ingress connection (SSL protocol error)" %
                role)
        except TimeoutError:
            raise errors.RetryOperation(
                "Waiting for %s ingress connection (timeout)" % role)
def push_deployer_bundle(core_api, stack_id, stack_config):
    search_head_hostnames = services.get_load_balancer_hosts(
        core_api, stack_id, services.search_head_role,
        stack_config["namespace"])
    if len(search_head_hostnames) == 0:
        raise errors.RetryOperation(
            "Waiting for hostname for search heads ...")
    search_head_hostname = search_head_hostnames[0]

    if is_sh_cluster_restart_in_progress(core_api, stack_id, stack_config):
        raise errors.RetryOperation(
            "wait for SH cluster restart process completed ...")

    service = instances.create_client(core_api, stack_id, stack_config,
                                      services.deployer_role)
    service.post(
        "apps/deploy",
        target="https://%s:8089" % (search_head_hostname),
        action="all",
        advertising="true",
        force="true",
    )
    logging.info("pushed SH deployer bundle")

    if is_sh_cluster_restart_in_progress(core_api, stack_id, stack_config):
        raise errors.RetryOperation(
            "wait for SH cluster restart process completed ...")
예제 #3
0
def wait_until_ready(splunk, kubernetes, stack_id, stack_config):
    indexer_cluster = get(splunk, kubernetes, stack_id, stack_config)
    if not indexer_cluster:
        raise Exception("could not find indexer cluster")
    if not "status" in indexer_cluster:
        raise errors.RetryOperation("waiting for cluster master status")
    status = indexer_cluster["status"]
    target_indexer_count = int(stack_config["indexer_count"])
    actualy_ready_replica = status["readyReplicas"]
    if target_indexer_count != actualy_ready_replica:
        raise errors.RetryOperation(
            "waiting for target number of indexers (expected %s, got %s)" % (
                target_indexer_count,
                actualy_ready_replica,
            ))
    cluster_master_phase = status["clusterMasterPhase"]
    if cluster_master_phase != "Ready":
        raise errors.RetryOperation(
            "waiting for cluster master to become ready (currently it's in %s phase)"
            % (cluster_master_phase))
    phase = status["phase"]
    if phase != "Ready":
        raise errors.RetryOperation(
            "waiting for indexer cluster to become ready (currently it's in %s phase)"
            % (phase))
예제 #4
0
def wait_until_ready(splunk, kubernetes, stack_id, stack_config):
    license_master = get(splunk, kubernetes, stack_id, stack_config)
    if not license_master:
        raise Exception("could not find license master")
    if not "status" in license_master:
        raise errors.RetryOperation("waiting for license master status")
    status = license_master["status"]
    phase = status["phase"]
    if phase != "Ready":
        raise errors.RetryOperation("waiting for license master to become ready (currently it's in %s phase)" % (
            phase
        ))
예제 #5
0
def wait_until_ready(splunk, kubernetes, stack_id, stack_config):
    standalone = get(splunk, kubernetes, stack_id, stack_config)
    if not standalone:
        raise Exception("could not find standalone")
    if not "status" in standalone:
        raise errors.RetryOperation("waiting for standalone status")
    status = standalone["status"]
    phase = status["phase"]
    if phase != "Ready":
        raise errors.RetryOperation("waiting for standalone to become ready (currently it's in %s phase)" % (
            phase
        ))
def stop_cases(splunk, test_id, test):
    cases_collection = get_performance_test_cases_collection(splunk)
    cases = cases_collection.query(
        query=json.dumps({
            "test_id": test_id,
        }),
        sort="index:1",
    )
    for case in cases:
        case_id = case["_key"]
        status = case["status"]
        if "stopped" in case and case["stopped"] == True:
            continue
        if status == CASE_WAITING:
            pass
        elif status == CASE_STARTING:
            stop_case(splunk, test_id, case_id, case)
            logging.info("stopped test case %s" % case_id)
        elif status == CASE_RUNNING:
            stop_case(splunk, test_id, case_id, case)
            logging.info("stopped test case %s" % case_id)
        elif status == CASE_STOPPING:
            stop_case(splunk, test_id, case_id, case)
            logging.info("stopped test case %s" % case_id)
        elif status == CASE_FINISHED:
            pass
        else:
            logging.error(
                "stop_cases: unexpected status for test case %s: %s" %
                (case_id, status))
            raise errors.RetryOperation()
        case.update({"stopped": True})
        cases_collection.update(case_id, json.dumps(case))
예제 #7
0
def wait_until_ready(splunk, kubernetes, stack_id, stack_config):
    search_head_cluster = get(splunk, kubernetes, stack_id, stack_config)
    if not search_head_cluster:
        raise Exception("could not find search head cluster")
    status = search_head_cluster["status"]
    # captain: ...
    # captainReady: true
    # deployerPhase: Ready
    # initialized: true
    # maintenanceMode: false
    # members:
    # - active_historical_search_count: 0
    #     active_realtime_search_count: 0
    #     adhoc_searchhead: false
    #     is_registered: true
    #     name: ...
    #     status: Up
    # minPeersJoined: true
    # phase: Ready
    # readyReplicas: 3
    # replicas: 3
    # selector: ...
    target_search_head_count = int(stack_config["search_head_count"])
    actualy_ready_replica = status["readyReplicas"]
    if target_search_head_count != actualy_ready_replica:
        raise errors.RetryOperation(
            "waiting for target number of search heads (expected %s, got %s)" %
            (
                target_search_head_count,
                actualy_ready_replica,
            ))
    deployer_phase = status["deployerPhase"]
    if deployer_phase != "Ready":
        raise errors.RetryOperation(
            "waiting for deployer to become ready (currently it's in %s phase)"
            % (deployer_phase))
    captain_ready = status["captainReady"]
    if not captain_ready:
        raise errors.RetryOperation(
            "search head cluster captain not yet ready")
    phase = status["phase"]
    if phase != "Ready":
        raise errors.RetryOperation(
            "waiting for search head cluster to become ready (currently it's in %s phase)"
            % (phase))
def stop_case(splunk, test_id, case_id, case):
    if "stack_id" not in case:
        return
    stack_id = case["stack_id"]
    result = splunk.get("saas/stack/%s" % stack_id)
    logging.debug("get stack result: %s" % result)
    response = json.loads(result.body.read())["entry"][0]["content"]
    logging.debug("get stack response: %s" % response)
    stack_status = response["status"]
    if stack_status == stacks.DELETING:
        raise errors.RetryOperation("still in status %s" % stacks.DELETING)
    elif stack_status == stacks.DELETED:
        pass
    elif stack_status != stacks.DELETED:
        result = splunk.delete("saas/stack/%s" % stack_id)
        response = json.loads(result.body.read())["entry"][0]["content"]
        logging.debug("delete stack result: %s" % response)
        raise errors.RetryOperation("issued deletion of stack %s" % (stack_id))
    stack_config = stacks.get_stack_config(splunk, stack_id)
    kube_client = clusters.create_client(splunk, stack_config["cluster"])
    apps_api = kubernetes.AppsV1Api(kube_client)
    datagen_deployments = apps_api.list_namespaced_deployment(
        namespace=stack_config["namespace"],
        label_selector="app=datagen,test=%s" % test_id,
    ).items
    for deployment in datagen_deployments:
        apps_api.delete_namespaced_deployment(
            name=deployment.metadata.name,
            namespace=stack_config["namespace"],
        )
        logging.debug("deleted deployment %s" % deployment.metadata.name)
    searchgen_deployments = apps_api.list_namespaced_deployment(
        namespace=stack_config["namespace"],
        label_selector="app=searchgen,test=%s" % test_id,
    ).items
    for deployment in searchgen_deployments:
        apps_api.delete_namespaced_deployment(
            name=deployment.metadata.name,
            namespace=stack_config["namespace"],
        )
        logging.debug("deleted deployment %s" % deployment.metadata.name)
def run_cases(splunk, test_id, test):
    cases_collection = get_performance_test_cases_collection(splunk)
    cases = cases_collection.query(
        query=json.dumps({
            "test_id": test_id,
        }),
        sort="index:1",
    )
    for case in cases:
        case_id = case["_key"]
        status = case["status"]
        if status == CASE_FINISHED:
            continue
        if status == CASE_WAITING:
            result = splunk.post(
                "saas/stacks", **{
                    "deployment_type": case["deployment_type"],
                    "indexer_count": case["indexer_count"],
                    "search_head_count": case["search_head_count"],
                    "cpu_per_instance": case["cpu_per_instance"],
                    "etc_storage_in_gb": case["etc_storage_in_gb"],
                    "other_var_storage_in_gb": case["other_var_storage_in_gb"],
                    "indexer_var_storage_in_gb":
                    case["indexer_var_storage_in_gb"],
                    "memory_per_instance": case["memory_per_instance"],
                    "title":
                    "Performance Test %s and Case %s" % (test_id, case_id),
                    "cluster": test["cluster"],
                })
            response = json.loads(result.body.read())["entry"][0]["content"]
            stack_id = response["stack_id"]
            logging.info("created stack %s for test case %s" %
                         (stack_id, case_id))
            case.update({
                "status": CASE_STARTING,
                "stack_id": stack_id,
            })
            cases_collection.update(case_id, json.dumps(case))
            raise errors.RetryOperation(
                "waiting for stack %s in test case %s starting up ..." %
                (stack_id, case_id))
        elif status == CASE_STARTING:
            stack_id = case["stack_id"]
            stack = splunk.get("saas/stack/%s" % stack_id)
            stack_status = json.loads(
                stack.body.read())["entry"][0]["content"]["status"]
            if stack_status == stacks.CREATING:
                raise errors.RetryOperation()
            if stack_status != stacks.CREATED:
                raise Exception("unexpected stack status: %s" % stack_status)
            logging.info("successfully created stack %s for case %s" %
                         (stack_id, case_id))
            stack_config = stacks.get_stack_config(splunk, stack_id)
            kube_client = clusters.create_client(splunk,
                                                 stack_config["cluster"])
            cluster_config = clusters.get_cluster(splunk, test["cluster"])
            node_selector_labels = cluster_config["node_selector"].split(",")
            node_selector_for_generators = {}
            for label in node_selector_labels:
                if label:
                    kv = label.split("=")
                    if len(kv) != 2:
                        raise errors.ApplicationError(
                            "invalid node selector format (%s)" %
                            cluster_config.node_selector)
                    node_selector_for_generators[kv[0]] = kv[1]
            apps_api = kubernetes.AppsV1Api(kube_client)
            core_api = kubernetes.CoreV1Api(kube_client)
            if stack_config["deployment_type"] == "standalone":
                indexer_hosts = services.get_load_balancer_hosts(
                    core_api, stack_id, services.standalone_role,
                    stack_config["namespace"])
            elif stack_config["deployment_type"] == "distributed":
                indexer_hosts = services.get_load_balancer_hosts(
                    core_api, stack_id, services.indexer_role,
                    stack_config["namespace"])
            else:
                raise Exception("unexpected deployment type: %s" %
                                stack_config["deployment_type"])
            data_volume_in_gb_per_day = int(case["data_volume_in_gb_per_day"])
            logging.debug("data_volume_in_gb_per_day=%s" %
                          (data_volume_in_gb_per_day))
            data_volume_in_gb_per_second = data_volume_in_gb_per_day / 24 / 60 / 60
            logging.debug("data_volume_in_gb_per_second=%s" %
                          (data_volume_in_gb_per_second))
            data_volume_in_kb_per_second = data_volume_in_gb_per_second * 1024 * 1024
            logging.debug("data_volume_in_kb_per_second=%s" %
                          (data_volume_in_kb_per_second))
            max_kb_per_second_per_data_generator = 100
            logging.debug("max_kb_per_second_per_data_generator=%s" %
                          (max_kb_per_second_per_data_generator))
            number_of_data_generators = max(
                int(data_volume_in_kb_per_second /
                    max_kb_per_second_per_data_generator) + 1, 1)
            logging.debug("number_of_data_generators=%s" %
                          (number_of_data_generators))
            data_volume_in_kb_per_second_per_data_generator = data_volume_in_kb_per_second / \
                number_of_data_generators
            logging.debug(
                "data_volume_in_kb_per_second_per_data_generator=%s" %
                (data_volume_in_kb_per_second_per_data_generator))
            deployment_name = "datagen-%s" % (stack_id)
            try:
                apps_api.read_namespaced_deployment(
                    deployment_name, namespace=stack_config["namespace"])
                data_gen_deployment_already_exists = True
            except kubernetes.rest.ApiException as e:
                if e.status != 404:
                    raise
                data_gen_deployment_already_exists = False
            if not data_gen_deployment_already_exists:
                apps_api.create_namespaced_deployment(
                    namespace=stack_config["namespace"],
                    body=kubernetes.V1Deployment(
                        metadata=kubernetes.V1ObjectMeta(
                            name=deployment_name,
                            namespace=stack_config["namespace"],
                            labels={
                                "app": "datagen",
                                "test": test_id,
                                "case": case_id,
                            },
                        ),
                        spec=kubernetes.V1DeploymentSpec(
                            replicas=number_of_data_generators,
                            selector=kubernetes.V1LabelSelector(
                                match_labels={
                                    "name": "datagen-%s" % (stack_id),
                                }),
                            template=kubernetes.V1PodTemplateSpec(
                                metadata=kubernetes.V1ObjectMeta(labels={
                                    "name":
                                    "datagen-%s" % (stack_id),
                                    "app":
                                    "datagen",
                                    "test":
                                    test_id,
                                    "case":
                                    case_id,
                                    "stack":
                                    stack_id,
                                }, ),
                                spec=kubernetes.V1PodSpec(
                                    containers=[
                                        kubernetes.V1Container(
                                            name="datagen",
                                            image=
                                            "blackhypothesis/splunkeventgenerator:latest",
                                            resources=kubernetes.
                                            V1ResourceRequirements(
                                                requests={
                                                    "memory": "10Mi",
                                                    "cpu": "500m",
                                                },
                                                limits={
                                                    "memory": "50Mi",
                                                    "cpu": "1",
                                                },
                                            ),
                                            env=[
                                                kubernetes.V1EnvVar(
                                                    name="DSTHOST",
                                                    value=";".join(
                                                        map(
                                                            lambda host: host +
                                                            ":9996",
                                                            indexer_hosts)),
                                                ),
                                                kubernetes.V1EnvVar(
                                                    name="KB_S",
                                                    value="%s" %
                                                    data_volume_in_kb_per_second_per_data_generator,
                                                ),
                                            ],
                                        ),
                                    ],
                                    node_selector=node_selector_for_generators,
                                ),
                            ),
                        ),
                    ),
                )
                logging.info("created %s data generators for case %s" %
                             (number_of_data_generators, case_id))
            if stack_config["deployment_type"] == "standalone":
                search_head_host = services.get_load_balancer_hosts(
                    core_api, stack_id, services.standalone_role,
                    stack_config["namespace"])[0]
            elif stack_config["deployment_type"] == "distributed":
                search_head_host = services.get_load_balancer_hosts(
                    core_api, stack_id, services.search_head_role,
                    stack_config["namespace"])[0]
            else:
                raise Exception("unexpected deployment type: %s" %
                                stack_config["deployment_type"])
            searches_per_day = int(case["searches_per_day"])
            logging.debug("searches_per_day=%s" % (searches_per_day))
            searches_per_second = searches_per_day / 24 / 60 / 60
            logging.debug("searches_per_second=%s" % (searches_per_second))
            max_searches_per_second_per_generator = 5
            logging.debug("max_searches_per_second_per_generator=%s" %
                          (max_searches_per_second_per_generator))
            number_of_search_generators = max(
                int(searches_per_second /
                    max_searches_per_second_per_generator) + 1, 1)
            logging.debug("number_of_search_generators=%s" %
                          (number_of_search_generators))
            searches_per_second_per_generator = searches_per_second / \
                number_of_search_generators
            logging.debug("searches_per_second_per_generator=%s" %
                          (searches_per_second_per_generator))
            search_template = case["search_template"]
            if searches_per_day > 0 and search_template:
                deployment_name = "searchgen-%s" % (stack_id)
                try:
                    apps_api.read_namespaced_deployment(
                        deployment_name, namespace=stack_config["namespace"])
                    search_gen_deployment_already_exists = True
                except kubernetes.rest.ApiException as e:
                    if e.status != 404:
                        raise
                    search_gen_deployment_already_exists = False
                if not search_gen_deployment_already_exists:
                    admin_password = instances.get_admin_password(
                        core_api, stack_id, stack_config,
                        services.search_head_role)
                    apps_api.create_namespaced_deployment(
                        namespace=stack_config["namespace"],
                        body=kubernetes.V1Deployment(
                            metadata=kubernetes.V1ObjectMeta(
                                name=deployment_name,
                                namespace=stack_config["namespace"],
                                labels={
                                    "app": "searchgen",
                                    "test": test_id,
                                    "case": case_id,
                                },
                            ),
                            spec=kubernetes.V1DeploymentSpec(
                                replicas=number_of_search_generators,
                                selector=kubernetes.V1LabelSelector(
                                    match_labels={
                                        "name": "searchgen-%s" % (stack_id),
                                    }),
                                template=kubernetes.V1PodTemplateSpec(
                                    metadata=kubernetes.V1ObjectMeta(labels={
                                        "name":
                                        "searchgen-%s" % (stack_id),
                                        "app":
                                        "searchgen",
                                        "test":
                                        test_id,
                                        "case":
                                        case_id,
                                        "stack":
                                        stack_id,
                                    }, ),
                                    spec=kubernetes.V1PodSpec(
                                        containers=[
                                            kubernetes.V1Container(
                                                name="searchgen",
                                                image=
                                                "hovu96/splunk-searchgen:latest",
                                                resources=kubernetes.
                                                V1ResourceRequirements(
                                                    requests={
                                                        "memory": "10Mi",
                                                        "cpu": "500m",
                                                    },
                                                    limits={
                                                        "memory": "50Mi",
                                                        "cpu": "1",
                                                    },
                                                ),
                                                env=[
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_SPL",
                                                        value=search_template,
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_HOST",
                                                        value=search_head_host,
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_USER",
                                                        value="admin",
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name=
                                                        "SEARCH_GEN_PASSWORD",
                                                        value=admin_password,
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_SPS",
                                                        value="%s" %
                                                        searches_per_second_per_generator,
                                                    ),
                                                ],
                                            ),
                                        ],
                                        node_selector=
                                        node_selector_for_generators,
                                    ),
                                ),
                            ),
                        ),
                    )
                    logging.info("created %s search generators for case %s" %
                                 (number_of_search_generators, case_id))
            else:
                logging.info("no search generators started")
            case.update({
                "status": CASE_RUNNING,
                "time_started_running": time.time(),
            })
            cases_collection.update(case_id, json.dumps(case))
            raise errors.RetryOperation("running test case %s ..." % case_id)
        elif status == CASE_RUNNING:
            time_started_running = case["time_started_running"]
            time_now = time.time()
            seconds_running_to_far = time_now - time_started_running
            target_run_duration = test["run_duration"]
            logging.debug(
                "time_started_running=%s time_now=%s seconds_running_to_far=%s"
                % (time_started_running, time_now, seconds_running_to_far))
            if seconds_running_to_far < (target_run_duration * 60):
                logging.debug("still waiting")
                raise errors.RetryOperation()
            logging.info("time elapsed for case %s" % (case_id))
            case.update({
                "status": CASE_STOPPING,
                "time_finished_running": time.time(),
            })
            cases_collection.update(case_id, json.dumps(case))
            raise errors.RetryOperation("stopping test case %s" % case_id)
        elif status == CASE_STOPPING:
            stop_case(splunk, test_id, case_id, case)
            case.update({
                "status": CASE_FINISHED,
            })
            cases_collection.update(case_id, json.dumps(case))
            logging.info("finished test case %s" % case_id)
        else:
            logging.error("run_cases: unexpected status for test case %s: %s" %
                          (case_id, status))
            raise errors.RetryOperation()