def install_as_local_app(splunk, kubernetes, stack_id, pod, app): core_api = kuberneteslib.CoreV1Api(kubernetes) stack_config = stacks.get_stack_config(splunk, stack_id) cluster_config = clusters.get_cluster(splunk, stack_config["cluster"]) pod_local_path = "/tmp/%s.tar" % (app.name) temp_dir = tempfile.mkdtemp() try: app.render(cluster_config, stack_config, temp_dir) render_saas_app_data(temp_dir, app) tar_directory_to_pod( core_api=core_api, pod=pod.metadata.name, namespace=stack_config["namespace"], local_path=temp_dir, remote_path=pod_local_path, ) finally: shutil.rmtree(temp_dir, ignore_errors=True) service = instances.create_client( core_api, stack_id, stack_config, pod.metadata.labels["app.kubernetes.io/name"]) try: service.post( "apps/local", filename=True, name=pod_local_path, update=True, explicit_appname=app.name, ) except splunklib.binding.HTTPError: raise logging.info("installed app '%s' to '%s" % (app.name, pod.metadata.name)) return service.restart_required
def update_deployment(splunk, kubernetes, stack_id): stack_config = stacks.get_stack_config(splunk, stack_id) cluster_name = stack_config["cluster"] kubernetes = clusters.create_client(splunk, cluster_name) cluster_config = clusters.get_cluster(splunk, cluster_name) if stack_config["deployment_type"] == "distributed": indexer_cluster.update(splunk, kubernetes, stack_id, stack_config) search_head_cluster.update(splunk, kubernetes, stack_id, stack_config) indexer_cluster.wait_until_ready(splunk, kubernetes, stack_id, stack_config) search_head_cluster.wait_until_ready(splunk, kubernetes, stack_id, stack_config)
def update_deployment(splunk, kubernetes, stack_id): core_api = kuberneteslib.CoreV1Api(kubernetes) stack_config = stacks.get_stack_config(splunk, stack_id) cluster_name = stack_config["cluster"] kubernetes = clusters.create_client(splunk, cluster_name) cluster_config = clusters.get_cluster(splunk, cluster_name) if stack_config["deployment_type"] == "distributed": indexer_cluster.update(splunk, kubernetes, stack_id, stack_config) search_head_cluster.update(splunk, kubernetes, stack_id, stack_config) indexer_cluster.wait_until_ready(splunk, kubernetes, stack_id, stack_config) search_head_cluster.wait_until_ready(splunk, kubernetes, stack_id, stack_config) create_load_balancers(core_api, stack_id, stack_config) verify_load_balancers_completed(core_api, stack_id, stack_config)
def down(splunk, stack_id, force=False): stacks.update_config(splunk, stack_id, { "status": stacks.DELETING, "deleted_time": time.time(), }) stack_config = stacks.get_stack_config(splunk, stack_id) cluster_name = stack_config["cluster"] cluster_config = clusters.get_cluster(splunk, cluster_name) api_client = clusters.create_client(splunk, cluster_name) core_api = kuberneteslib.CoreV1Api(api_client) custom_objects_api = kuberneteslib.CustomObjectsApi(api_client) try: services.delete_all_load_balancers(core_api, stack_id, stack_config["namespace"]) stack_deployment.delete_objects(api_client, stack_id, stack_config, cluster_config) except: if not force: raise stacks.update_config(splunk, stack_id, { "status": stacks.DELETED, })
def copy_app_into_folder(splunk, kubernetes, stack_id, pod, app, target_parent_name): core_api = kuberneteslib.CoreV1Api(kubernetes) stack_config = stacks.get_stack_config(splunk, stack_id) cluster_config = clusters.get_cluster(splunk, stack_config["cluster"]) temp_dir = tempfile.mkdtemp() try: app.render(cluster_config, stack_config, temp_dir) render_saas_app_data(temp_dir, app) copy_directory_to_pod( core_api=core_api, pod=pod.metadata.name, namespace=stack_config["namespace"], local_path=temp_dir, remote_path="/opt/splunk/etc/%s/%s/" % (target_parent_name, app.name), ) logging.info("copied app '%s' at '%s' to '%s'" % (app.name, target_parent_name, pod.metadata.name)) finally: shutil.rmtree(temp_dir, ignore_errors=True)
def up(splunk, stack_id): stack_config = stacks.get_stack_config(splunk, stack_id) cluster_name = stack_config["cluster"] kubernetes = clusters.create_client(splunk, cluster_name) cluster_config = clusters.get_cluster(splunk, cluster_name) status = stack_config["status"] if status == stacks.CREATING: stack_deployment.create_deployment(splunk, kubernetes, stack_id, stack_config, cluster_config) app_deployment.update_apps(splunk, kubernetes, stack_id) logging.info("created") stacks.update_config(splunk, stack_id, { "status": stacks.CREATED, }) elif status == stacks.UPDATING: stack_deployment.update_deployment(splunk, kubernetes, stack_id) app_deployment.update_apps(splunk, kubernetes, stack_id) logging.info("updated") stacks.update_config(splunk, stack_id, { "status": stacks.CREATED, }) elif status == stacks.CREATED: logging.info("Everything is up-to-date") else: logging.warning("unexpected status: %s", status)
def handle_POST(self): stacks = get_stacks(self.splunk) defaults = self.splunk.confs["defaults"]["general"] # create stack record stack_record = { "status": CREATING, } fields_names = set([ "deployment_type", "license_master_mode", "enterprise_license", "indexer_count", "search_head_count", "cpu_per_instance", "memory_per_instance", "title", "data_fabric_search", "spark_worker_count", "cluster", "namespace", "etc_storage_in_gb", "other_var_storage_in_gb", "indexer_var_storage_in_gb", ]) # apply request parameters request_params = parse_qs(self.request['payload']) stack_record.update({ k: request_params[k][0] for k in fields_names if k in request_params }) # apply missing fields from defaults stack_record.update({ k: defaults[k] for k in fields_names if k in defaults and k not in stack_record }) # apply missing fields from cluster config cluster_name = stack_record["cluster"] cluster_config = clusters.get_cluster( self.service, cluster_name) stack_record.update( { k: cluster_config[k] for k in fields_names if k in cluster_config and k not in stack_record } ) # add missing fields if "data_fabric_search" not in stack_record: stack_record["data_fabric_search"] = "false" if "spark_worker_count" not in stack_record: stack_record["spark_worker_count"] = "0" if "cpu_per_instance" not in stack_record: stack_record["cpu_per_instance"] = "1" if "memory_per_instance" not in stack_record: stack_record["memory_per_instance"] = "4Gi" # save stack stack_id = stacks.insert(json.dumps(stack_record))["_key"] # start operator stack_operation.start(self.service, stack_id) # return ID self.send_result({ "stack_id": stack_id, })
def run_cases(splunk, test_id, test): cases_collection = get_performance_test_cases_collection(splunk) cases = cases_collection.query( query=json.dumps({ "test_id": test_id, }), sort="index:1", ) for case in cases: case_id = case["_key"] status = case["status"] if status == CASE_FINISHED: continue if status == CASE_WAITING: result = splunk.post( "saas/stacks", **{ "deployment_type": case["deployment_type"], "indexer_count": case["indexer_count"], "search_head_count": case["search_head_count"], "cpu_per_instance": case["cpu_per_instance"], "etc_storage_in_gb": case["etc_storage_in_gb"], "other_var_storage_in_gb": case["other_var_storage_in_gb"], "indexer_var_storage_in_gb": case["indexer_var_storage_in_gb"], "memory_per_instance": case["memory_per_instance"], "title": "Performance Test %s and Case %s" % (test_id, case_id), "cluster": test["cluster"], }) response = json.loads(result.body.read())["entry"][0]["content"] stack_id = response["stack_id"] logging.info("created stack %s for test case %s" % (stack_id, case_id)) case.update({ "status": CASE_STARTING, "stack_id": stack_id, }) cases_collection.update(case_id, json.dumps(case)) raise errors.RetryOperation( "waiting for stack %s in test case %s starting up ..." % (stack_id, case_id)) elif status == CASE_STARTING: stack_id = case["stack_id"] stack = splunk.get("saas/stack/%s" % stack_id) stack_status = json.loads( stack.body.read())["entry"][0]["content"]["status"] if stack_status == stacks.CREATING: raise errors.RetryOperation() if stack_status != stacks.CREATED: raise Exception("unexpected stack status: %s" % stack_status) logging.info("successfully created stack %s for case %s" % (stack_id, case_id)) stack_config = stacks.get_stack_config(splunk, stack_id) kube_client = clusters.create_client(splunk, stack_config["cluster"]) cluster_config = clusters.get_cluster(splunk, test["cluster"]) node_selector_labels = cluster_config["node_selector"].split(",") node_selector_for_generators = {} for label in node_selector_labels: if label: kv = label.split("=") if len(kv) != 2: raise errors.ApplicationError( "invalid node selector format (%s)" % cluster_config.node_selector) node_selector_for_generators[kv[0]] = kv[1] apps_api = kubernetes.AppsV1Api(kube_client) core_api = kubernetes.CoreV1Api(kube_client) if stack_config["deployment_type"] == "standalone": indexer_hosts = services.get_load_balancer_hosts( core_api, stack_id, services.standalone_role, stack_config["namespace"]) elif stack_config["deployment_type"] == "distributed": indexer_hosts = services.get_load_balancer_hosts( core_api, stack_id, services.indexer_role, stack_config["namespace"]) else: raise Exception("unexpected deployment type: %s" % stack_config["deployment_type"]) data_volume_in_gb_per_day = int(case["data_volume_in_gb_per_day"]) logging.debug("data_volume_in_gb_per_day=%s" % (data_volume_in_gb_per_day)) data_volume_in_gb_per_second = data_volume_in_gb_per_day / 24 / 60 / 60 logging.debug("data_volume_in_gb_per_second=%s" % (data_volume_in_gb_per_second)) data_volume_in_kb_per_second = data_volume_in_gb_per_second * 1024 * 1024 logging.debug("data_volume_in_kb_per_second=%s" % (data_volume_in_kb_per_second)) max_kb_per_second_per_data_generator = 100 logging.debug("max_kb_per_second_per_data_generator=%s" % (max_kb_per_second_per_data_generator)) number_of_data_generators = max( int(data_volume_in_kb_per_second / max_kb_per_second_per_data_generator) + 1, 1) logging.debug("number_of_data_generators=%s" % (number_of_data_generators)) data_volume_in_kb_per_second_per_data_generator = data_volume_in_kb_per_second / \ number_of_data_generators logging.debug( "data_volume_in_kb_per_second_per_data_generator=%s" % (data_volume_in_kb_per_second_per_data_generator)) deployment_name = "datagen-%s" % (stack_id) try: apps_api.read_namespaced_deployment( deployment_name, namespace=stack_config["namespace"]) data_gen_deployment_already_exists = True except kubernetes.rest.ApiException as e: if e.status != 404: raise data_gen_deployment_already_exists = False if not data_gen_deployment_already_exists: apps_api.create_namespaced_deployment( namespace=stack_config["namespace"], body=kubernetes.V1Deployment( metadata=kubernetes.V1ObjectMeta( name=deployment_name, namespace=stack_config["namespace"], labels={ "app": "datagen", "test": test_id, "case": case_id, }, ), spec=kubernetes.V1DeploymentSpec( replicas=number_of_data_generators, selector=kubernetes.V1LabelSelector( match_labels={ "name": "datagen-%s" % (stack_id), }), template=kubernetes.V1PodTemplateSpec( metadata=kubernetes.V1ObjectMeta(labels={ "name": "datagen-%s" % (stack_id), "app": "datagen", "test": test_id, "case": case_id, "stack": stack_id, }, ), spec=kubernetes.V1PodSpec( containers=[ kubernetes.V1Container( name="datagen", image= "blackhypothesis/splunkeventgenerator:latest", resources=kubernetes. V1ResourceRequirements( requests={ "memory": "10Mi", "cpu": "500m", }, limits={ "memory": "50Mi", "cpu": "1", }, ), env=[ kubernetes.V1EnvVar( name="DSTHOST", value=";".join( map( lambda host: host + ":9996", indexer_hosts)), ), kubernetes.V1EnvVar( name="KB_S", value="%s" % data_volume_in_kb_per_second_per_data_generator, ), ], ), ], node_selector=node_selector_for_generators, ), ), ), ), ) logging.info("created %s data generators for case %s" % (number_of_data_generators, case_id)) if stack_config["deployment_type"] == "standalone": search_head_host = services.get_load_balancer_hosts( core_api, stack_id, services.standalone_role, stack_config["namespace"])[0] elif stack_config["deployment_type"] == "distributed": search_head_host = services.get_load_balancer_hosts( core_api, stack_id, services.search_head_role, stack_config["namespace"])[0] else: raise Exception("unexpected deployment type: %s" % stack_config["deployment_type"]) searches_per_day = int(case["searches_per_day"]) logging.debug("searches_per_day=%s" % (searches_per_day)) searches_per_second = searches_per_day / 24 / 60 / 60 logging.debug("searches_per_second=%s" % (searches_per_second)) max_searches_per_second_per_generator = 5 logging.debug("max_searches_per_second_per_generator=%s" % (max_searches_per_second_per_generator)) number_of_search_generators = max( int(searches_per_second / max_searches_per_second_per_generator) + 1, 1) logging.debug("number_of_search_generators=%s" % (number_of_search_generators)) searches_per_second_per_generator = searches_per_second / \ number_of_search_generators logging.debug("searches_per_second_per_generator=%s" % (searches_per_second_per_generator)) search_template = case["search_template"] if searches_per_day > 0 and search_template: deployment_name = "searchgen-%s" % (stack_id) try: apps_api.read_namespaced_deployment( deployment_name, namespace=stack_config["namespace"]) search_gen_deployment_already_exists = True except kubernetes.rest.ApiException as e: if e.status != 404: raise search_gen_deployment_already_exists = False if not search_gen_deployment_already_exists: admin_password = instances.get_admin_password( core_api, stack_id, stack_config, services.search_head_role) apps_api.create_namespaced_deployment( namespace=stack_config["namespace"], body=kubernetes.V1Deployment( metadata=kubernetes.V1ObjectMeta( name=deployment_name, namespace=stack_config["namespace"], labels={ "app": "searchgen", "test": test_id, "case": case_id, }, ), spec=kubernetes.V1DeploymentSpec( replicas=number_of_search_generators, selector=kubernetes.V1LabelSelector( match_labels={ "name": "searchgen-%s" % (stack_id), }), template=kubernetes.V1PodTemplateSpec( metadata=kubernetes.V1ObjectMeta(labels={ "name": "searchgen-%s" % (stack_id), "app": "searchgen", "test": test_id, "case": case_id, "stack": stack_id, }, ), spec=kubernetes.V1PodSpec( containers=[ kubernetes.V1Container( name="searchgen", image= "hovu96/splunk-searchgen:latest", resources=kubernetes. V1ResourceRequirements( requests={ "memory": "10Mi", "cpu": "500m", }, limits={ "memory": "50Mi", "cpu": "1", }, ), env=[ kubernetes.V1EnvVar( name="SEARCH_GEN_SPL", value=search_template, ), kubernetes.V1EnvVar( name="SEARCH_GEN_HOST", value=search_head_host, ), kubernetes.V1EnvVar( name="SEARCH_GEN_USER", value="admin", ), kubernetes.V1EnvVar( name= "SEARCH_GEN_PASSWORD", value=admin_password, ), kubernetes.V1EnvVar( name="SEARCH_GEN_SPS", value="%s" % searches_per_second_per_generator, ), ], ), ], node_selector= node_selector_for_generators, ), ), ), ), ) logging.info("created %s search generators for case %s" % (number_of_search_generators, case_id)) else: logging.info("no search generators started") case.update({ "status": CASE_RUNNING, "time_started_running": time.time(), }) cases_collection.update(case_id, json.dumps(case)) raise errors.RetryOperation("running test case %s ..." % case_id) elif status == CASE_RUNNING: time_started_running = case["time_started_running"] time_now = time.time() seconds_running_to_far = time_now - time_started_running target_run_duration = test["run_duration"] logging.debug( "time_started_running=%s time_now=%s seconds_running_to_far=%s" % (time_started_running, time_now, seconds_running_to_far)) if seconds_running_to_far < (target_run_duration * 60): logging.debug("still waiting") raise errors.RetryOperation() logging.info("time elapsed for case %s" % (case_id)) case.update({ "status": CASE_STOPPING, "time_finished_running": time.time(), }) cases_collection.update(case_id, json.dumps(case)) raise errors.RetryOperation("stopping test case %s" % case_id) elif status == CASE_STOPPING: stop_case(splunk, test_id, case_id, case) case.update({ "status": CASE_FINISHED, }) cases_collection.update(case_id, json.dumps(case)) logging.info("finished test case %s" % case_id) else: logging.error("run_cases: unexpected status for test case %s: %s" % (case_id, status)) raise errors.RetryOperation()