def kind_available(): """Return `True` if we have an integration test cluster available.""" if not sh: # We probably run on Windows - no integration test cluster. return False # Query the version of the integration test cluster. If that works we have # a cluster that the tests can use, otherwise not. try: sh.kubectl("--kubeconfig", "/tmp/kubeconfig-kind.yaml", "version") except (ImportError, sh.CommandNotFound, sh.ErrorReturnCode_1): return False return True
def install(self): command = [ 'create', 'secret', 'generic', self.secret_name, '--dry-run', '-o', 'yaml' ] literal_secrets = list(self.manipulator.get_literal_secrets()) file_secrets = list(self.manipulator.get_file_secrets()) if literal_secrets or file_secrets: for key, value in literal_secrets: command.append('--from-literal={}={}'.format(key, value)) for subpath in file_secrets: command.append('--from-file={}'.format(subpath)) sh.kubectl(sh.kubectl(*command), 'apply', '--record', '-f', '-')
def _pod_check(self): _pod_number = sh.wc( sh.kubectl('get', 'pod', '-l', 'commitHash=%s' % self.git_hash, '-n', self.name_space), '-l') if int(_pod_number.replace('\n', "")) != self.num: return False return True
def main(): # Load config from default location. config.load_kube_config() v1 = client.CoreV1Api() fluentd_pod_name = None # find by name print("Find fluentd pod by name '{}'".format(FLUENTD_PATTERN)) fluentd_regex = re.compile(FLUENTD_PATTERN) resp = v1.list_namespaced_pod(FLUENTD_NAMESPACE) for i in resp.items: if fluentd_regex.search(i.metadata.name) is not None: print(i.metadata.name) # find by label selector print("Find fluentd pod by label selector '{}'".format(FLUENTD_LABELS)) resp = v1.list_namespaced_pod(FLUENTD_NAMESPACE, label_selector=FLUENTD_LABELS) for i in resp.items: print(i.metadata.name) fluentd_pod_name = i.metadata.name # check fluentd configuration # NOTE: exec in Python librarry does not work well, use shell command as a workaround # See https://github.com/kubernetes-client/python/issues/485 result = sh.kubectl( ('exec -n logging ' + fluentd_pod_name + ' cat /etc/fluent/config.d/forward.input.conf').split()) if FLUENTD_INPUT in result: print("fluentd input configured correctly") else: print("fluentd input not configured\n{}".format(FLUENTD_INPUT))
def setup(self): with contextlib.suppress(sh.ErrorReturnCode): sh.kubectl('delete', 'configmap', 'global') sh.kubectl( 'create', 'configmap', 'global', '--from-literal', 'monolith-host={}'.format(self.monolith_host), '--from-literal', 'base-domain={}'.format(self.base_domain), '--from-literal', 'alternative-domain={}'.format(self.alternative_domain), '--from-literal', 'debug={}'.format(self.debug), )
def _check_deployment(self, _deploy_name): _num = sh.wc( sh.kubectl('get', 'pod', '-l', 'app=%s' % _deploy_name, '-n', self.name_space), '-l') self.num = int(_num.replace('\n', "")) if self.num < 2: log_print("%s pod 不存在." % _deploy_name) os._exit(4)
def test_nonpreferred_api(self, tmp_path): """Sync `autoscaling/v1` and `autoscaling/v2beta` at the same time. This test is designed to verify that Square will interrogate the correct K8s endpoint versions to download the manifest. """ # Only show INFO and above or otherwise this test will produce a # humongous amount of useless logs from all the K8s calls. square.square.setup_logging(2) config = Config( folder=tmp_path, groupby=GroupBy(label="app", order=[]), kubecontext=None, kubeconfig=Filepath("/tmp/kubeconfig-kind.yaml"), selectors=Selectors( kinds={"Namespace", "HorizontalPodAutoscaler"}, namespaces=["test-hpa"], labels=[], ), ) # Copy the manifest with the namespace and the two HPAs to the temporary path. manifests = list( yaml.safe_load_all(open("tests/support/k8s-test-hpa.yaml"))) man_path = tmp_path / "manifest.yaml" man_path.write_text(yaml.dump_all(manifests)) assert len(manifests) == 3 # --------------------------------------------------------------------- # Deploy the resources: one namespace with two HPAs in it. On will be # deployed via `autoscaling/v1` the other via `autoscaling/v2beta2`. # --------------------------------------------------------------------- sh.kubectl("apply", "--kubeconfig", config.kubeconfig, "-f", str(man_path)) # --------------------------------------------------------------------- # Sync all manifests. This must do nothing. In particular, it must not # change the `apiVersion` of either HPA. # --------------------------------------------------------------------- assert not square.square.get_resources(config) assert list(yaml.safe_load_all(man_path.read_text())) == manifests
def set_k8s_job(job): job_name = uuid.uuid4().hex y = """apiVersion: batch/v1 kind: Job metadata: name: %s spec: template: spec: containers: - name: pygoonk image: druuu/pygoonk:0.5 command: ['/usr/bin/python3', 'manage.py', 'run', '%s'] restartPolicy: Never""" % (job_name, job.job_id) jf = "/tmp/" + job_name with open(jf, 'w') as f: f.write(y) sh.kubectl("create", "-f", jf, "-n", settings.K8S_NAMESPACE) os.remove(jf)
def get_all_deployments_for_namespace_as_yaml(namespace): """Return a list of deployments in yaml form for a given namespace.""" print("Retrieving all deployments for the namespace {}".format(namespace)) try: deployments_as_yaml = yaml.load( kubectl("get", "deployments", "--namespace", namespace, "--output", "yaml").stdout)['items'] return deployments_as_yaml except Exception: print("Error happened while retrieving deployments") print(sys.exc_info()) return {}
def manage_minikube_svc(action): if 'minikube' in list(pconfig.keys()): if 'loadsvc' in list(pconfig['minikube'].keys()): print("%s services in minikube..." % (action)) try: output = sh.kubectl('config', 'use-context', 'minikube') # print(output) output = sh.minikube_services(action) print(output) except sh.ErrorReturnCode as e: print("ERROR: minikube-services unable to %s services" % (action)) print(e.stderr)
def update_kube_config(cluster_name, buttdir, master_string): """ Add new context into kube config""" user_dir = os.path.expanduser("~") update = 'n' update = input("Add {} cluster to kube config? (y|n) ".foramt(cluster_name)) if update == 'y': if os.path.isfile("{}/.kube/config".format(user_dir)): shutil.copy("{}/.kube/config".format(user_dir), "{}/.kube/config.bak".format(user_dir)) kubectl("config", "set-cluster", "{}-cluster".format(cluster_name), "--server={}".format( master_string), "--certificate-authority={}/ssl/ca.pem".format(buttdir)) kubectl("config", "set-credentials", "{}-admin".format(cluster_name), "--certificate-authority={}/ssl/ca.pem".format(buttdir), "--client-key={}/ssl/admin-key.pem".format(buttdir), "--client-certificate={}/ssl/admin.pem".format(buttdir)) kubectl("config", "set-context", "{}-system".format(cluster_name), "--cluster={}-cluster".format(cluster_name), "--user={}-admin".format(cluster_name)) kubectl("config", "use-context", "{}-system".format(cluster_name))
def is_prowjob_finished(build_id): if not build_id: logger.warning("The resource group doesn't have the build id tag.") return False output = sh.kubectl('get', 'prowjob', '-n', 'default', '-o', 'json', '-l', 'prow.k8s.io/build-id={}'.format(build_id)) prowjob = json.loads(output.stdout) if len(prowjob['items']) == 0: logger.info("The prowjob doesn't exist anymore.") return True state = prowjob['items'][0]['status'].get('state') if state != 'pending': logger.info("The prowjob is not running anymore.") return True return False
def is_key_present(self, key): try: yml_output = str( sh.kubectl( 'get', 'secrets', self.secret_name, '--output', 'yaml', )) except sh.ErrorReturnCode: return False else: secret = yaml.safe_load(yml_output) return key in secret['data']
def deployment_update(self): if self.env == "prod": log_print("prod 环境暂时手动更新, 镜像: %s." % self.images_tag) return True log_print("开始kubctl apply 部署") _deploy_file = '%s/../kubctl_template/springBoot/Deploy_template' % BASE_PATH sh.cd(self.docker_build_dir) _deploy_env = self._deployment_var() self._check_deployment(_deploy_env["DEPLOY_NAME"]) sh.cp(_deploy_file, 'Deploy_template') _dep_file = sh.envsubst(sh.cat('Deploy_template'), _env=_deploy_env) _yaml_name = "%s.yaml" % _deploy_env["DEPLOY_NAME"] fo = open(_yaml_name, "w") for d_file in _dep_file: fo.write(d_file) fo.close() if not os.path.exists(_yaml_name): log_print("yaml文件创建失败.") os._exit(4) log_print("yaml文件创建成功,开始kubctl apply 更新...") sh.kubectl('apply', '-f', _yaml_name, _out=process_output)
def get_pods(namespace, deployment_name, deployment_labels): """Get a list of pods for a given deployment label set.""" print("getting pods for deployment {}".format(deployment_name), file=sys.stderr) selectors = [] for deployment_label in deployment_labels: selectors.append('{}={}'.format(deployment_label, deployment_labels[deployment_label])) label_string = ','.join(selectors) pods_as_yaml = {} try: pods_as_yaml = yaml.load( kubectl("get", "pods", "--namespace", namespace, "-l", label_string, "-o", "yaml").stdout)['items'] except Exception: print("Couldn't retrieve pods for the deployment {}".format( deployment_name)) return pods_as_yaml
def delete_pod(namespace, pods, deployment_name, sleep_timer): """ Delete a list of given pods. Follows a sleep timer to insert a sleep time between each deletion. """ for pod in pods: try: # print("kubectl delete pod --namespace {} {}".format(namespace, pod)) kube_delete_op_result = kubectl('delete', 'pod', '--namespace', namespace, pod) print(kube_delete_op_result.stdout.decode('utf-8')) f = open('/tmp/podrotaterstatus-{}'.format(deployment_name), 'r') rotation_information = json.loads(f.read()) if pod in rotation_information['unrotated_pod_list']: rotation_information['unrotated_pod_list'].remove(pod) f = open('/tmp/podrotaterstatus-{}'.format(deployment_name), 'w') f.write(json.dumps(rotation_information)) f.close() time.sleep(sleep_timer) except Exception: print("Failed to delete {}".format(pod))
def delete_pods_for_given_deployment(args_to_parse): """Entry function for the module.""" parser = argparse.ArgumentParser() parser.add_argument( '--namespace', dest='namespace', action='store', help='Namespace to work in (use all to signal all namespaces)', default="default") parser.add_argument( '--deployments', dest='deployments', action='store', nargs='+', help='Use all to signal all deployments for the namespace') parser.add_argument( '--sleep', dest='sleep_timer', action='store', type=int, default=5, help='Time in seconds to keep between deleting each pod. Defaults to 5' ) parser.add_argument( '--threaded', dest='threaded', action='store_true', help= 'Experimental: Leverages threads to delete pods across deployments in parallel' ) parser.add_argument('--restart', dest='restart', action='store_true') args = parser.parse_args(args_to_parse) if args.restart: restart_deletion(args.threaded, args.sleep_timer) else: if not args.deployments: print("error: the following arguments are required: --deployments") sys.exit(-1) f = open('/tmp/podrotaterlast', 'w') current_command_info = { "namespace": args.namespace, "deployments": args.deployments } f.write(json.dumps(current_command_info)) f.close() if args.deployments != ["all"]: print("Attempting to grab deployment details") try: deployments_as_yaml = yaml.load( kubectl("get", "deployments", "--namespace", args.namespace, args.deployments, "-o", "yaml").stdout) if 'items' not in deployments_as_yaml: deployments = [deployments_as_yaml] else: deployments = deployments_as_yaml['items'] except Exception: print("oops did not work") sys.exit(1) else: try: deployments_as_yaml = yaml.load( kubectl("get", "deployments", "--namespace", args.namespace, "-o", "yaml").stdout) deployments = deployments_as_yaml['items'] except Exception: print("oh nop. all didnt work") sys.exit(1) print("Downloaded deployment information") deployments_with_labels = get_deployment_labels(deployments) pods_to_delete = {} for deployment_name in deployments_with_labels: pods = get_pods(args.namespace, deployment_name, deployments_with_labels[deployment_name]) pods_to_delete[deployment_name] = [ pod['metadata']['name'] for pod in pods ] f = open('/tmp/podrotaterstatus-{}'.format(deployment_name), 'w') info_to_write = { "deployment_name": deployment_name, "namespace": args.namespace, "unrotated_pod_list": pods_to_delete[deployment_name] } f.write(json.dumps(info_to_write)) f.close() if not args.threaded: for deployment_name in pods_to_delete: pod_list = pods_to_delete[deployment_name] delete_pod(args.namespace, pod_list, deployment_name, args.sleep_timer) else: delete_pods_in_threads(pods_to_delete, args.namespace, args.sleep_timer)
def start_port_forward(target, *ports): process = kubectl('port-forward', target, *ports, _bg=True) running_port_forwards.append(process)
def test_nondefault_resources(self, tmp_path): """Manage an `autoscaling/v1` and `autoscaling/v2beta` at the same time. This test is designed to verify that Square will interrogate the correct K8s endpoint versions to compute the plan for a resource. """ # Only show INFO and above or otherwise this test will produce a # humongous amount of logs from all the K8s calls. square.square.setup_logging(2) config = Config( folder=tmp_path, groupby=GroupBy(label="app", order=[]), kubecontext=None, kubeconfig=Filepath("/tmp/kubeconfig-kind.yaml"), selectors=Selectors(kinds={"Namespace", "HorizontalPodAutoscaler"}, namespaces=["test-hpa"], labels=[]), ) # Copy the manifest with the namespace and the two HPAs to the temporary path. manifests = list( yaml.safe_load_all(open("tests/support/k8s-test-hpa.yaml"))) man_path = tmp_path / "manifest.yaml" man_path.write_text(yaml.dump_all(manifests)) assert len(manifests) == 3 # --------------------------------------------------------------------- # Deploy the resources: one namespace with two HPAs in it. On will be # deployed via `autoscaling/v1` the other via `autoscaling/v2beta2`. # --------------------------------------------------------------------- sh.kubectl("apply", "--kubeconfig", config.kubeconfig, "-f", str(man_path)) # --------------------------------------------------------------------- # The plan must be empty because Square must have interrogated the # correct API endpoints for each HPA. # --------------------------------------------------------------------- plan_1, err = square.square.make_plan(config) assert not err assert plan_1.create == plan_1.patch == plan_1.delete == [] del plan_1 # --------------------------------------------------------------------- # Modify the v2beta2 HPA manifest and verify that Square now wants to # patch that resource. # --------------------------------------------------------------------- # Make a change to the manifest and save it. tmp_manifests = copy.deepcopy(manifests) assert tmp_manifests[2]["apiVersion"] == "autoscaling/v2beta2" tmp_manifests[2]["spec"]["metrics"][0]["external"]["metric"][ "name"] = "foo" man_path.write_text(yaml.dump_all(tmp_manifests)) # The plan must report one patch. plan_2, err = square.square.make_plan(config) assert not err assert plan_2.create == plan_2.delete == [] and len(plan_2.patch) == 1 assert plan_2.patch[0].meta.name == "hpav2beta2" assert plan_2.patch[0].meta.apiVersion == "autoscaling/v2beta2" del plan_2 # --------------------------------------------------------------------- # Delete both HPAs with Square. # --------------------------------------------------------------------- # Keep only the namespace manifest and save the file. tmp_manifests = copy.deepcopy(manifests[:1]) man_path.write_text(yaml.dump_all(tmp_manifests)) # Square must now want to delete both HPAs. plan_3, err = square.square.make_plan(config) assert not err assert plan_3.create == plan_3.patch == [] and len(plan_3.delete) == 2 assert {_.meta.name for _ in plan_3.delete} == {"hpav1", "hpav2beta2"} assert not square.square.apply_plan(config, plan_3) del plan_3 # --------------------------------------------------------------------- # Re-create both HPAs with Square. # --------------------------------------------------------------------- # Restore the original manifest file. man_path.write_text(yaml.dump_all(manifests)) # Create a plan. That plan must want to restore both HPAs. plan_4, err = square.square.make_plan(config) assert not err assert plan_4.delete == plan_4.patch == [] and len(plan_4.create) == 2 assert {_.meta.name for _ in plan_4.create} == {"hpav1", "hpav2beta2"} assert {_.meta.apiVersion for _ in plan_4.create } == {"autoscaling/v1", "autoscaling/v2beta2"} assert not square.square.apply_plan(config, plan_4) del plan_4 # Apply the plan. plan_5, err = square.square.make_plan(config) assert not err assert plan_5.create == plan_5.patch == plan_5.delete == [] del plan_5 # --------------------------------------------------------------------- # Verify that a change in the `apiVersion` would mean a patch. # --------------------------------------------------------------------- # Manually change the API version of one of the HPAs. tmp_manifests = copy.deepcopy(manifests) assert tmp_manifests[1]["apiVersion"] == "autoscaling/v1" tmp_manifests[1]["apiVersion"] = "autoscaling/v2beta2" man_path.write_text(yaml.dump_all(tmp_manifests)) # Square must now produce a single non-empty patch. plan_6, err = square.square.make_plan(config) assert not err assert plan_6.delete == plan_6.create == [] and len(plan_6.patch) == 1 assert plan_6.patch[0].meta.name == "hpav1" assert plan_6.patch[0].meta.apiVersion == "autoscaling/v2beta2"
def kubectl(*args, **kwargs): try: return sh.kubectl(*args, **kwargs) except sh.ErrorReturnCode as e: log.error(e)
def submit(self): try: from minio import Minio from google.cloud import storage except ImportError: raise ValueError( "Required libraries for kubeflow aren't installed, run `pip install wandb[kubeflow]`" ) print('Submitting arena {} job 🚀'.format(self.args[0])) # TODO: require command? opt_index, _ = self._parse_flag("--", len(self.args) - 1) name_index, name = self._parse_flag("--name") if name_index == -1: name = "wandb" name_index = len(self.args) - 1 self.args.insert(name_index, None) name = '-'.join([name, _short_id(5)]) self.args[name_index] = "--name=" + name projo = self.wandb_project or self.api.settings("project") if projo: if "/" in projo: ent, projo = projo.split("/") self.args.insert(opt_index, "--env=WANDB_ENTITY={}".format(ent)) else: _, git = self._parse_flag("--syncSource") _, image = self._parse_flag("--image") if git: projo = git.split("/")[-1].replace(".git", "") elif image: projo = image.split(":")[0] if projo: projo = self.api.format_project(projo) self.args.insert(opt_index, "--env=WANDB_PROJECT={}".format(projo)) if self.wandb_api_key: self.args.insert( opt_index, "--env=WANDB_API_KEY={}".format(self.wandb_api_key)) else: # Extract the secret, ideally this would be a secret env in the TFjob YAML try: kube_args = {"o": "json"} index, namespace = self._parse_flag("--namespace") if namespace: kube_args["namespace"] = namespace secret = json.loads( str(sh.kubectl("get", "secret", "wandb", **kube_args))) except sh.ErrorReturnCode: secret = {} if secret.get("data"): print("Found wandb k8s secret, adding to environment") api_key = secret["data"].get("api_key") if api_key: self.args.insert( opt_index, "--env=WANDB_API_KEY=" + base64.b64decode(api_key).decode("utf8")) self.wandb_api_key = api_key if self.wandb_api_key: try: # TODO: support someone overriding entity if self.workers <= 1: res = self.api.upsert_run(name=self.wandb_run_id, project=projo) wandb_run_path = os.path.join( res["project"]["entity"]["name"], res["project"]["name"], "runs", res["name"]) print( 'Run configured with W&B\nview live results here: {}'. format("https://app.wandb.ai/" + wandb_run_path)) self.args.insert( opt_index, "--env=WANDB_RUN_ID={}".format(res["name"])) self.args.insert(opt_index, "--env=WANDB_RESUME=allow") else: res = self.api.viewer() self.args.insert(opt_index, "--env=WANDB_RUN_GROUP=" + name) wandb_run_path = os.path.join(res["entity"], projo, "groups", name) print( 'Distributed run configured with W&B\nview live results here: {}' .format("https://app.wandb.ai/" + wandb_run_path)) except CommError: print("Failed to talk to W&B") else: print( "Couldn't authenticate with W&B, run `wandb login` on your local machine" ) index, gcs_url = self._parse_flag("--logdir") tensorboard = self._parse_flag("--tensorboard")[0] > -1 if gcs_url and wandb_run_path: pipeline_metadata(gcs_url, wandb_run_path, tensorboard) elif wandb_run_path: print("--logdir isn't set, skipping pipeline asset saving.") cmd = arena(["submit"] + self.args) print("Arena job {} submitted, watching state for upto {} minutes". format(name, self.timeout_minutes)) total_time = 0 poll_rate = 10 while True: # TODO: parse JSON when it's supported status = str(arena("get", name)).split("\n") rows = [ row for row in (re.split(r"\s+", row) for row in status) if len(row) == 6 and "s" in row[3] ] if len(rows) <= 1: print("Final status: ", rows) break status = [row[1] for row in rows[1:]] runtime = [row[3] for row in rows[1:]] print("Status: {} {}".format(status, runtime)) if not all([s in ("PENDING", "RUNNING") for s in status]): if not any([s in ("PENDING", "RUNNING") for s in status]): print("Job finished with statuses: {}".format(status)) if any([s == "FAILED" for s in status]): arena("logs", name, _fg=True) break time.sleep(poll_rate) total_time += 10 if total_time > 90: poll_rate = 30 if total_time > self.timeout_minutes * 60: print("Timeout exceeded")
def test_workflow(self, tmp_path): """Delete and restore full namespace with Square. We will use `kubectl` to create a new namespace and populate it with resources. Then we will use Square to backup it up, delete it and finally restore it again. """ # Only show INFO and above or otherwise this test will produce a # humongous amount of logs from all the K8s calls. square.square.setup_logging(2) # Define the resource priority and kinds we have in our workflow # manifests. Only target the `test-workflow` labels to avoid problems # with non-namespaced resources. priorities = ( "Namespace", "Secret", "ConfigMap", "ClusterRole", "ClusterRoleBinding", "Role", "RoleBinding", ) namespace = "test-workflow" config = Config( folder=tmp_path / "backup", groupby=GroupBy(label="app", order=[]), kubecontext=None, kubeconfig=Filepath("/tmp/kubeconfig-kind.yaml"), priorities=priorities, selectors=Selectors(kinds=set(priorities), namespaces=[namespace], labels=["app=test-workflow"]), ) # --------------------------------------------------------------------- # Deploy a new namespace with only a few resources. There are no # deployments among them to speed up the deletion of the namespace. # --------------------------------------------------------------------- sh.kubectl("apply", "--kubeconfig", config.kubeconfig, "-f", "tests/support/k8s-test-resources.yaml") # --------------------------------------------------------------------- # Create a plan for "square-tests". The plan must delete all resources # because we have not downloaded any manifests yet. # --------------------------------------------------------------------- plan_1, err = square.square.make_plan(config) assert not err assert plan_1.create == plan_1.patch == [] and len(plan_1.delete) > 0 # --------------------------------------------------------------------- # Backup all resources. A plan against that backup must be empty. # --------------------------------------------------------------------- assert not (config.folder / "_other.yaml").exists() err = square.square.get_resources(config) assert not err and (config.folder / "_other.yaml").exists() plan_2, err = square.square.make_plan(config) assert not err assert plan_2.create == plan_2.patch == plan_2.delete == [] # --------------------------------------------------------------------- # Apply the first plan to delete all resources including the namespace. # --------------------------------------------------------------------- assert not square.square.apply_plan(config, plan_1) # --------------------------------------------------------------------- # Wait until K8s has deleted the namespace. # --------------------------------------------------------------------- for i in range(120): time.sleep(1) try: sh.kubectl("get", "ns", namespace, "--kubeconfig", config.kubeconfig) except sh.ErrorReturnCode_1: break else: assert False, f"Could not delete the namespace <{namespace}> in time" # --------------------------------------------------------------------- # Use backup manifests to restore the namespace. # --------------------------------------------------------------------- plan_3, err = square.square.make_plan(config) assert not err assert plan_3.patch == plan_3.delete == [] and len(plan_3.create) > 0 # Apply the new plan. assert not square.square.apply_plan(config, plan_3) plan_4, err = square.square.make_plan(config) assert not err assert plan_4.create == plan_4.patch == plan_4.delete == []