def spawn_job(self, task, node): """ Spawn a Kubernetes job on a Kubernetes node. If a job with the same name already exists, delete it. Args: task (str): Type of job to run (e.g. runxhpl). node (str): Name of the node on which to run the job. Returns: job (V1Job): Spawned job. Raises: FailToCreateError: An error occured creating the job. """ job = None kube_client = client.api_client.ApiClient() (exists, job) = self.job_exists() if exists: logger.info("Found existing job: {0}".format(job.metadata.name)) delete_obj(job) self.wait_for_delete() logger.info("Creating worker: {0}-{1}".format(task, node)) if isinstance(self.worker_yaml, dict): try: kubeutils.create_from_dict(kube_client, self.worker_yaml) except kubeutils.FailToCreateError as err: # list(ApiException) raise err else: job = get_job(self.worker_yaml["metadata"]["name"]) return job
def handle_autoscaler(self, name: str): if self.resources and self.resources.get('auto_scale', False): if self.find_autoscaler(name): self.LOG.debug('Removing old autoscaler: {}'.format(name)) k8s_client.AutoscalingV1Api(self.api_client). \ delete_namespaced_horizontal_pod_autoscaler(name=name, namespace=self.namespace) self.LOG.info("Creating horizontal Pod autoscaler") template = dict( apiVersion='autoscaling/v1', kind='HorizontalPodAutoscaler', metadata=dict(name=name, namespace=self.namespace), spec=dict(minReplicas=self.resources.get('minReplicas', 1), maxReplicas=self.resources.get('maxReplicas', 10), targetCPUUtilizationPercentage=self.resources.get( 'targetCPUUtilizationPercentage', 50), scaleTargetRef=dict(apiVersion='apps/v1', name=name, kind='Deployment'))) try: k8s_utils.create_from_dict(self.api_client, template) self.LOG.debug(template) except Exception as e: self.LOG.debug("Failed to create autoscaler: {}".format(name)) self.LOG.debug(repr(e))
def start_worker_job(self) -> None: # This needs to be a unique directory since jobs may share a host work_dir_prefix: str = ( self.args.worker_work_dir_prefix if self.args.worker_work_dir_prefix else '/tmp/' ) worker_id: str = uuid.uuid4().hex worker_name: str = f'cl-worker-{worker_id}' work_dir: str = os.path.join(work_dir_prefix, f'{worker_name}_work_dir') command: List[str] = self.build_command(worker_id, work_dir) worker_image: str = 'codalab/worker:' + os.environ.get('CODALAB_VERSION', 'latest') config: Dict[str, Any] = { 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': {'name': worker_name}, 'spec': { 'containers': [ { 'name': f'{worker_name}-container', 'image': worker_image, 'command': command, 'securityContext': {'runAsUser': 0}, # Run as root 'env': [ {'name': 'CODALAB_USERNAME', 'value': self.codalab_username}, {'name': 'CODALAB_PASSWORD', 'value': self.codalab_password}, ], 'resources': { 'limits': { 'cpu': self.args.cpus, 'memory': f'{self.args.memory_mb}Mi', 'nvidia.com/gpu': self.args.gpus, # Configure NVIDIA GPUs } }, 'volumeMounts': [ {'name': 'dockersock', 'mountPath': '/var/run/docker.sock'}, {'name': 'workdir', 'mountPath': work_dir}, ], } ], 'volumes': [ {'name': 'dockersock', 'hostPath': {'path': '/var/run/docker.sock'}}, {'name': 'workdir', 'hostPath': {'path': work_dir}}, ], 'restartPolicy': 'Never', # Only run a job once }, } # Use Kubernetes to start a worker on GCP logger.debug('Starting worker {} with image {}'.format(worker_id, worker_image)) try: utils.create_from_dict(self.k8_client, config) except (client.ApiException, FailToCreateError) as e: logger.error(f'Exception when calling Kubernetes utils->create_from_dict: {e}')
def main(): # Configs can be set in Configuration class directly or using helper utility config.load_kube_config("kubeconfig-sa") name_suffix = "-" + binascii.b2a_hex(os.urandom(8)) priority_class_name = "routine" env_subst = { "${NAMESPACE}": "spark-jobs", "${SERVICE_ACCOUNT_NAME}": "driver-sa", "${DRIVER_NODE_AFFINITIES}": "driver", "${EXECUTOR_NODE_AFFINITIES}": "compute", "${NAME_SUFFIX}": name_suffix, "${PRIORITY_CLASS_NAME}": priority_class_name } k8s_client = ApiClient() verbose = True # Create driver pod k8s_dir = os.path.join(os.path.dirname(__file__), "k8s/spark-submit") k8s_object_dict = create_k8s_object( os.path.join(k8s_dir, "pyspark-pi-driver-pod.yaml"), env_subst) pprint(k8s_object_dict) k8s_objects = utils.create_from_dict(k8s_client, k8s_object_dict, verbose=verbose) # Prepare ownership on dependent objects owner_refs = [{ "apiVersion": "v1", "controller": True, "kind": "Pod", "name": k8s_objects[0].metadata.name, "uid": k8s_objects[0].metadata.uid }] # List all YAML files in k8s/spark-submit directory, except the driver pod definition file other_resources = listdir(k8s_dir) other_resources.remove("pyspark-pi-driver-pod.yaml") for f in other_resources: k8s_object_dict = create_k8s_object(os.path.join(k8s_dir, f), env_subst) # Set ownership k8s_object_dict["metadata"]["ownerReferences"] = owner_refs pprint(k8s_object_dict) utils.create_from_dict(k8s_client, k8s_object_dict, verbose=verbose) print("Submitted %s" % (k8s_objects[0].metadata.labels["app-name"]))
def test_create_apps_deployment_from_yaml_obj(self): k8s_client = client.api_client.ApiClient(configuration=self.config) with open(self.path_prefix + "apps-deployment.yaml") as f: yml_obj = yaml.safe_load(f) yml_obj["metadata"]["name"] = "nginx-app-3" utils.create_from_dict(k8s_client, yml_obj) app_api = client.AppsV1Api(k8s_client) dep = app_api.read_namespaced_deployment(name="nginx-app-3", namespace="default") self.assertIsNotNone(dep) app_api.delete_namespaced_deployment(name="nginx-app-3", namespace="default", body={})
async def _create_external_deployment(api_client, app_client, docker_images, tmpdir): namespace = 'external-deployment-ns' args = set_deployment_parser().parse_args([ '--uses', f'docker://{docker_images[0]}', '--name', 'external-deployment' ]) external_deployment_config = K8sDeploymentConfig(args=args, k8s_namespace=namespace) configs = external_deployment_config.to_k8s_yaml() deployment_base = os.path.join(tmpdir, 'external-deployment') filenames = [] for name, k8s_objects in configs: filename = os.path.join(deployment_base, f'{name}.yml') os.makedirs(deployment_base, exist_ok=True) with open(filename, 'w+') as fp: filenames.append(filename) for i, k8s_object in enumerate(k8s_objects): yaml.dump(k8s_object, fp) if i < len(k8s_objects) - 1: fp.write('---\n') from kubernetes import utils namespace_object = { 'apiVersion': 'v1', 'kind': 'Namespace', 'metadata': { 'name': f'{namespace}' }, } try: utils.create_from_dict(api_client, namespace_object) except: pass for filename in filenames: try: utils.create_from_yaml( api_client, yaml_file=filename, namespace=namespace, ) except: pass await asyncio.sleep(1.0)
def create_from_yaml(k8s_client, yaml_file, verbose=False, namespace="default", **kwargs): """ Perform an action from a yaml file. Pass True for verbose to print confirmation information. Input: yaml_file: dict. YAML file content. k8s_client: an ApiClient object, initialized with the client args. verbose: If True, print confirmation from the create action. Default is False. namespace: string. Contains the namespace to create all resources inside. The namespace must preexist otherwise the resource creation will fail. If the API object in the yaml file already contains a namespace definition this parameter has no effect. Available parameters for creating <kind>: :param async_req bool :param bool include_uninitialized: If true, partially initialized resources are included in the response. :param str pretty: If 'true', then the output is pretty printed. :param str dry_run: When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed Returns: The created kubernetes API objects. Raises: FailToCreateError which holds list of `client.rest.ApiException` instances for each object that failed to create. """ #yml_document_all = yaml.safe_load_all(yaml_file) yml_document_all = yaml_file failures = [] k8s_objects = [] for yml_document in yml_document_all: try: created = utils.create_from_dict(k8s_client, yml_document, verbose, namespace=namespace, **kwargs) k8s_objects.append(created) except utils.FailToCreateError as failure: failures.extend(failure.api_exceptions) if failures: raise utils.FailToCreateError(failures) return k8s_objects
def create_objects_from_dict(self, filepath, namespace=None): """Create kubernetes object from a yaml encapsulated inside a dictionary""" yaml_objects = Parser(filepath).return_manifests_dict for manifest in yaml_objects: try: # handle special cases of namespace injection if namespace: manifest["metadata"]["namespace"] = namespace utils.create_from_dict(self.api_client, manifest) logger.info('Created {}/{}'.format( manifest["kind"], manifest["metadata"]["name"])) except (client.rest.ApiException, Exception) as e: # AttributeError: module 'kubernetes.client' has no attribute 'NetworkingIstioIoV1alpha3Api' if "module 'kubernetes.client' has no attribute 'NetworkingIstioIoV1alpha3Api'" in str( e): logger.warning("Creating {} failed.".format( manifest["kind"])) logger.info("Trying again using kubectl...") exec_cmd("kubectl apply -f {} -n {}".format( filepath, namespace)) break self.check_create_error_and_response( e, manifest["kind"], manifest["metadata"]["name"])
def createResourceFromYaml(self, filepath, namespace='default'): TmpObjectDict = { "created_objs": [], "failed_objs": [], } if not os.path.isfile(os.path.abspath(filepath)): return { "ret_code": 1, 'result': "file %s not exists" % (filepath, ) } with open(os.path.abspath(filepath), mode='rb') as f: try: TmpYAMLDocs = yaml.safe_load_all(f) for doc in TmpYAMLDocs: if not doc: continue print('current kind: ' + str(doc['kind']) + ' name: ' + str(doc['metadata']['name'])) RawNamespacedFuncName = 'getNamespaced' + doc['kind'] RawNoneNamespacedFuncName = 'get' + doc['kind'] if hasattr(self, RawNamespacedFuncName): TmpResponse = getattr(self, RawNamespacedFuncName)( name=doc['metadata']['name'], namespace=namespace) if TmpResponse['ret_code'] == 0: TmpObjectDict['created_objs'].append(TmpResponse) continue elif hasattr(self, RawNoneNamespacedFuncName): TmpResponse = getattr(self, RawNoneNamespacedFuncName)( name=doc['metadata']['name']) if TmpResponse['ret_code'] == 0: TmpObjectDict['created_objs'].append(TmpResponse) continue try: TmpResponse = create_from_dict( k8s_client=client.ApiClient(), data=doc, namespace=namespace) TmpObjectDict['created_objs'].append(TmpResponse) except Exception as e: print(str(e)) TmpObjectDict['failed_objs'].append(doc) return {'ret_code': 0, 'result': TmpObjectDict} except Exception as e: print(str(e)) return {'ret_code': 1, 'result': str(e)}
def apply_GSelectedCluster_yaml(self, topicData): print('start-----------------apply_GSelectedCluster_yaml') if 'requestID' not in topicData['msg']: print('requestID not in topicData[msg]') return print('1') requestID = topicData['msg']['requestID'] if 'fileID' not in topicData['msg']: print('fileID not in topicData[msg]') return print('1') fileID = topicData['msg']['fileID'] print('2') if 'type' not in topicData['target']: self.send_error(requestID, 'type not in topicData[target]') return print('3') if 'object' not in topicData['target']: self.send_error(requestID, 'object not in topicData[target]') return print('4') if 'requestData' not in topicData['msg']: self.send_error(requestID, 'requestData not in topicData[msg]') return print('end---------------------apply_GSelectedCluster_yaml') yaml_file = self.get_yaml_file_from_redis(fileID) print('5') print('topicData[''msg''][''requestData'']',topicData['msg']['requestData']) ''' {'requestID': 'req-b9494ca5-6e9a-4ab3-8392-8795f0b5eb3e', 'date': '2021-10-21 12:05:54', 'status': 'create', 'fileID': 'b2ab5fbe-e7bf-44dc-84d7-b969ad62f104', 'failCnt': 0, 'env': {'type': 'global', 'targetClusters': ['c1', 'c2', 'c3'], 'priority': 'GSelectedCluster'}} ''' result = 'cancel' if yaml_file != None: #transter normal yaml file to gedge yaml file ''' =============================================== only apply pod/deployment.yaml =================================================== print('6') yaml_dic= yaml.load(yaml_file,Loader=yaml.FullLoader) print('7') if yaml_dic != None : print('8') result = self.apply_yaml(yaml_dic) print('9') else : print('10') result = 'fail' =============================================== ''' ''' =============================================== apply everything yaml ===================================================''' try : print('6') yaml_dic= yaml.load(yaml_file,Loader=yaml.FullLoader) print('7') if yaml_dic != None : print('8') resp = utils.create_from_dict(k8s_client, yaml_dic) #print('resp of utils.create_from_yaml ====>',resp) print('create_from_yaml is completed ',yaml_file) result = 'success' print('9') else : print('10') result = 'fail' except : print("create_from_yaml", full_filename," Failed.") result = 'fail' else: print('error : yaml file read ') result = 'cancel' temp_msg = {'source': {'type':'cluster', 'object': self.cluster_name}, 'target':{'type':'none'}, 'hcode':400, 'lcode':2, 'msg':{'result': result} } print('11') self.send_result(requestID, temp_msg) print('12')
def apply_manifest(self, manifest): return utils.create_from_dict(self.api.client, manifest, namespace=self.name)
def main(): # Configs can be set in Configuration class directly or using helper utility config.load_kube_config("kubeconfig-sa") namespace = "spark-jobs" name_suffix = "-" + binascii.b2a_hex(os.urandom(8)) priority_class_name = "routine" env_subst = { "${NAMESPACE}": namespace, "${SERVICE_ACCOUNT_NAME}": "driver-sa", "${DRIVER_NODE_AFFINITIES}": "driver", "${EXECUTOR_NODE_AFFINITIES}": "compute", "${NAME_SUFFIX}": name_suffix, "${PRIORITY_CLASS_NAME}": priority_class_name } custom_object_api = client.CustomObjectsApi() # Create pod yaml_file = os.path.join(os.path.dirname(__file__), "k8s/spark-operator/pyspark-pi.yaml") spark_app = create_k8s_object(yaml_file, env_subst) pprint(spark_app) # create the resource group = "sparkoperator.k8s.io" version = "v1beta2" plural = "sparkapplications" custom_object_api.create_namespaced_custom_object( group=group, version=version, namespace=namespace, plural=plural, body=spark_app, ) print("Resource created") # get the resource and print out data resource = custom_object_api.get_namespaced_custom_object( group=group, version=version, name="pyspark-pi-%s%s" % (priority_class_name, name_suffix), namespace=namespace, plural=plural, ) print("Resource details:") pprint(resource) # Hijack the auto-created UI service and change its type from ClusterIP to NodePort # app_name = resource["metadata"]["name"] # ui_service_name = app_name + "-ui-svc" # core_v1_api = client.CoreV1Api() # # w = watch.Watch() # field_selector = "metadata.name=%s" % ui_service_name # for event in w.stream(core_v1_api.list_namespaced_service, namespace=namespace, # field_selector=field_selector, # timeout_seconds=30): # ui_svc = event['object'] # if ui_svc: # w.stop() # else: # print("Event: UI service not yet available") # # ui_svc.spec.type = "NodePort" # core_v1_api.patch_namespaced_service(name=ui_service_name, namespace="spark-jobs", body=ui_svc) # Create ingress # Prepare ownership on dependent objects owner_refs = [{ "apiVersion": "sparkoperator.k8s.io/v1beta2", "controller": True, "kind": "SparkApplication", "name": resource["metadata"]["name"], "uid": resource["metadata"]["uid"] }] yaml_file = os.path.join(os.path.dirname(__file__), "k8s/spark-operator/pyspark-pi-ui-ingress.yaml") k8s_object_dict = create_k8s_object(yaml_file, env_subst) # Set ownership k8s_object_dict["metadata"]["ownerReferences"] = owner_refs pprint(k8s_object_dict) k8s_client = ApiClient() utils.create_from_dict(k8s_client, k8s_object_dict, verbose=True)
async def create_all_flow_deployments_and_wait_ready( flow_dump_path, namespace, api_client, app_client, core_client, deployment_replicas_expected, logger, ): from kubernetes import utils namespace = namespace.lower() namespace_object = { 'apiVersion': 'v1', 'kind': 'Namespace', 'metadata': { 'name': f'{namespace}' }, } try: logger.info(f'create Namespace {namespace}') utils.create_from_dict(api_client, namespace_object) except: pass while True: ns_items = core_client.list_namespace().items if any(item.metadata.name == namespace for item in ns_items): logger.info(f'created Namespace {namespace}') break logger.info(f'waiting for Namespace {namespace}') await asyncio.sleep(1.0) deployment_set = set(os.listdir(flow_dump_path)) for deployment_name in deployment_set: file_set = set( os.listdir(os.path.join(flow_dump_path, deployment_name))) for file in file_set: try: utils.create_from_yaml( api_client, yaml_file=os.path.join(flow_dump_path, deployment_name, file), namespace=namespace, ) except Exception as e: # some objects are not successfully created since they exist from previous files logger.info( f'Did not create ressource from {file} for pod {deployment_name} due to {e} ' ) pass # wait for all the pods to be up expected_deployments = sum(deployment_replicas_expected.values()) while True: namespaced_pods = core_client.list_namespaced_pod(namespace) if (namespaced_pods.items is not None and len(namespaced_pods.items) == expected_deployments): break logger.info( f'Waiting for all {expected_deployments} Deployments to be created, only got {len(namespaced_pods.items) if namespaced_pods.items is not None else None}' ) await asyncio.sleep(1.0) # wait for all the pods to be up resp = app_client.list_namespaced_deployment(namespace=namespace) deployment_names = set([item.metadata.name for item in resp.items]) assert deployment_names == set(deployment_replicas_expected.keys()) while len(deployment_names) > 0: deployments_ready = [] for deployment_name in deployment_names: api_response = app_client.read_namespaced_deployment( name=deployment_name, namespace=namespace) expected_num_replicas = deployment_replicas_expected[ deployment_name] if (api_response.status.ready_replicas is not None and api_response.status.ready_replicas == expected_num_replicas): logger.info(f'Deploymnt {deployment_name} is now ready') deployments_ready.append(deployment_name) else: logger.info( f'Deploymnt {deployment_name} is not ready yet: ready_replicas is {api_response.status.ready_replicas} not equal to {expected_num_replicas}' ) for deployment_name in deployments_ready: deployment_names.remove(deployment_name) logger.info(f'Waiting for {deployment_names} to be ready') await asyncio.sleep(1.0)
async def test_process_up_down_events(docker_images): from kubernetes import client from kubernetes import utils k8s_client = client.ApiClient() app_client = client.AppsV1Api(api_client=k8s_client) core_client = client.CoreV1Api(api_client=k8s_client) namespace = f'pool-test-namespace-{docker_images[0][0:4]}' namespace_object = { 'apiVersion': 'v1', 'kind': 'Namespace', 'metadata': { 'name': f'{namespace}' }, } try: utils.create_from_dict(k8s_client, namespace_object) except: pass container_args = ['executor', '--native', '--port-in', '8081'] if 'test-executor' in docker_images[0]: container_args.extend(['--uses', 'config.yml']) deployment_object = { 'apiVersion': 'apps/v1', 'kind': 'Deployment', 'metadata': { 'name': 'dummy-deployment', 'namespace': f'{namespace}' }, 'spec': { 'replicas': 1, 'strategy': { 'type': 'RollingUpdate', 'rollingUpdate': { 'maxSurge': 1, 'maxUnavailable': 0 }, }, 'selector': { 'matchLabels': { 'app': 'dummy-deployment' } }, 'template': { 'metadata': { 'labels': { 'app': 'dummy-deployment', 'jina_deployment_name': 'some-deployment', 'shard_id': '4', 'pod_type': 'WORKER', 'ns': f'{namespace}', } }, 'spec': { 'containers': [{ 'name': 'executor', 'image': docker_images[0], 'command': ['jina'], 'args': container_args, 'ports': [{ 'containerPort': 8081 }], 'readinessProbe': { 'tcpSocket': { 'port': 8081 }, 'initialDelaySeconds': 5, 'periodSeconds': 10, }, }] }, }, }, } utils.create_from_dict(k8s_client, deployment_object, namespace=namespace) pool = K8sGrpcConnectionPool(namespace=namespace, client=core_client) pool.start() await asyncio.sleep(1.0) namespaced_pods = core_client.list_namespaced_pod(namespace) while not namespaced_pods.items: await asyncio.sleep(1.0) namespaced_pods = core_client.list_namespaced_pod(namespace) assigned_pod_ip = namespaced_pods.items[0].status.pod_ip for container in namespaced_pods.items[0].spec.containers: if container.name == 'executor': assigned_port = container.ports[0].container_port break expected_replicas = 1 while True: api_response = app_client.read_namespaced_deployment( name='dummy-deployment', namespace=namespace) if (api_response.status.ready_replicas is not None and api_response.status.ready_replicas == expected_replicas or (api_response.status.ready_replicas is None and expected_replicas == 0)): replica_lists = pool._connections.get_replicas_all_shards( 'some-deployment') assert expected_replicas == sum([ len(replica_list.get_all_connections()) for replica_list in replica_lists ]) if expected_replicas == 1: replica_lists[0].has_connection( f'{assigned_pod_ip}:{assigned_port}') # scale up to 2 replicas app_client.patch_namespaced_deployment_scale( 'dummy-deployment', namespace=namespace, body={'spec': { 'replicas': 2 }}, ) expected_replicas += 1 elif expected_replicas == 2: # scale down by 2 replicas app_client.patch_namespaced_deployment_scale( 'dummy-deployment', namespace=namespace, body={'spec': { 'replicas': 0 }}, ) expected_replicas = 0 else: break else: await asyncio.sleep(1.0) await pool.close()
async def create_all_flow_deployments_and_wait_ready(flow_dump_path, namespace, api_client, app_client, core_client): from kubernetes import utils namespace_object = { 'apiVersion': 'v1', 'kind': 'Namespace', 'metadata': { 'name': f'{namespace}' }, } try: utils.create_from_dict(api_client, namespace_object) except: pass deployment_set = set(os.listdir(flow_dump_path)) assert deployment_set == {'gateway', 'slow_process_executor'} for deployment_name in deployment_set: file_set = set( os.listdir(os.path.join(flow_dump_path, deployment_name))) if deployment_name == 'gateway': assert file_set == {'gateway.yml'} else: assert file_set == { 'slow-process-executor.yml', } for file in file_set: try: utils.create_from_yaml( api_client, yaml_file=os.path.join(flow_dump_path, deployment_name, file), namespace=namespace, ) except Exception: # some objects are not successfully created since they exist from previous files pass # wait for all the pods to be up while True: namespaced_pods = core_client.list_namespaced_pod(namespace) if namespaced_pods.items is not None and len( namespaced_pods.items) == 4: break await asyncio.sleep(1.0) # wait for all the pods to be up resp = app_client.list_namespaced_deployment(namespace=namespace) deployment_names = set([item.metadata.name for item in resp.items]) assert deployment_names == { 'gateway', 'slow-process-executor', } expected_replicas = { 'gateway': 1, 'slow-process-executor': 3, } while len(deployment_names) > 0: deployments_ready = [] for deployment_name in deployment_names: api_response = app_client.read_namespaced_deployment( name=deployment_name, namespace=namespace) expected_num_replicas = expected_replicas[deployment_name] if (api_response.status.ready_replicas is not None and api_response.status.ready_replicas == expected_num_replicas): deployments_ready.append(deployment_name) for deployment_name in deployments_ready: deployment_names.remove(deployment_name) await asyncio.sleep(1.0)
def job_create(yaml_output, namespace): d = os.getenv('DEFCONFIG').split('+') defconfig = d[0] frag = None if len(d) > 1: frag = d[1] job_name = 'build-j{}-{}-{}-{}'.format( os.getenv('BUILD_ID'), os.getenv('ARCH'), os.getenv('BUILD_ENVIRONMENT'), defconfig, ) if frag: frag = os.path.splitext(os.path.basename(frag))[0] job_name += "-{}".format(frag) # job name can only have '-' job_name = re.sub('[\.:/_+=]', '-', job_name).lower() # k8s limits job-name to max 63 chars (and be sure it doesn't end with '-') job_name = job_name[0:63].rstrip('-') # FIXME: needs to be tweaked according to k8s cluster VMs cpu_limit = int(os.getenv('K8S_CPU_LIMIT', 8)) parallel_builds = os.getenv('PARALLEL_BUILDS') if parallel_builds: parallel_builds = int(parallel_builds) cpu_limit = min(cpu_limit, parallel_builds) os.environ['PARALLEL_JOPT'] = "{}".format(parallel_builds) if (cpu_limit < 8): cpu_request = cpu_limit * 0.875 # HACK: Azure nodes with 32 vCPUs refuse jobs with # CPU request > 30. Support ticket open with # Azure elif (cpu_limit == 32): cpu_request = 30 else: cpu_request = cpu_limit - 0.9 # VMs are generous, let's be greedy and ask for 1Gb per core :) mem_request = cpu_limit params = { 'job_name': job_name, 'cpu_limit': cpu_limit, 'cpu_request': cpu_request, 'mem_request': "{}Gi".format(mem_request) } env = Environment(loader=FileSystemLoader(['config/k8s']), extensions=["jinja2.ext.do"]) env.filters['env_override'] = env_override template = env.get_template("job-build.jinja2") job_yaml_text = template.render(params) if (yaml_output): print("Writing job to ".format(yaml_output)) fp = open(yaml_output, "w") fp.write(job_yaml_text) fp.close() # Translate the parsed YAML into a k8s job job_dict = yaml.safe_load(job_yaml_text) try: k8s_client = client.ApiClient() job = utils.create_from_dict(k8s_client, data=job_dict, namespace=namespace) except utils.FailToCreateError as e: print("Failed to create job: ", e) sys.exit(1) print("Started job {}".format(job_name)) return job_name
def create_from_dict(self, dictionary): try: return utils.create_from_dict(self._client, dictionary) except ApiException as e: self._raise_runtime_error(e)
def updateTopology(cls, source: str, info: dict, nodes: dict): cls.config = info config.load_kube_config() configuration.assert_hostname = False k8sClient = client.ApiClient() loader = YAML(typ='safe') files = cls._listFiles(source) newNodes = [] try: #Search ingress controller already deployed cls.controllers = cls._searchIngressControllers() for controller in cls.controllers: newNode = cls._addControllerToTopology(controller, nodes) if newNode: newNodes.append(newNode) else: cls.controllers.remove(controller) #Deployment of the application print(' Deploying the application...') i = 0 for k8sFile in files: yamls = re.split('^---\n', cls._readFile(k8sFile), flags=re.MULTILINE) for contentStr in yamls: contentDict = loader.load(contentStr) if not contentDict: continue cls._prepareYaml(contentStr, contentDict) with open( join(cls.config['modDeploymentFiles'], str(i) + '.yml'), 'w') as f: try: f.write(yaml.dump(contentDict)) utils.create_from_dict(k8sClient, contentDict) except utils.FailToCreateError: cls._cleanEnvironment() raise DeploymentError('Error deploying ' + k8sFile) i = i + 1 #Wait until the deployment is completed v1 = client.CoreV1Api() deploymentCompleted = False while not deploymentCompleted: pods = v1.list_pod_for_all_namespaces(watch=False) deploymentCompleted = True for pod in pods.items: if pod.spec.hostname in nodes: if pod.status.phase != 'Running' and pod.status.phase != 'Succeeded': deploymentCompleted = False break for containerStatus in pod.status.container_statuses: if not containerStatus.ready: deploymentCompleted = False break if not deploymentCompleted: break if not deploymentCompleted: time.sleep(3) print(' Deployment completed') #Start monitoring print(' Monitoring in progress...') pods = v1.list_pod_for_all_namespaces(watch=False) containerName = ''.join(c for c in cls.config['monitoringContainer'] if c.isalnum()) for pod in pods.items: if pod.spec.hostname in nodes or ( pod.metadata.annotations and 'archMinerName' in pod.metadata.annotations and pod.metadata.annotations['archMinerName'] in nodes) and pod.status.phase == 'Running': fileName = pod.spec.hostname if pod.spec.hostname in nodes else pod.metadata.annotations[ 'archMinerName'] filePath = join('/home/dump', fileName + '.json') command = [ './bin/sh', '-c', 'tshark -i eth0 -a duration:' + str(info['time'] + 3) + ' -N nNdt -T json > ' + filePath + ' 2>/dev/null &' ] try: resp = stream(v1.connect_get_namespaced_pod_exec, pod.metadata.name, pod.metadata.namespace, command=command, container=containerName, stderr=False, stdin=False, stdout=True, tty=False) except ApiException as e: cls._cleanEnvironment() raise MonitoringError(pod.metadata.name) #Start tests time.sleep(3) if info['test']: try: testModule = importlib.import_module(info['test']) testModule.runTest() except: cls._cleanEnvironment() raise TestError('') #Wait until monitoring is finished time.sleep(info['time'] + 5) print(' Monitoring completed') #Save on local host the packets pods = v1.list_pod_for_all_namespaces(watch=False) for pod in pods.items: if pod.spec.hostname in nodes or ( pod.metadata.annotations and 'archMinerName' in pod.metadata.annotations and pod.metadata.annotations['archMinerName'] in nodes) and pod.status.phase == 'Running': fileName = pod.spec.hostname if pod.spec.hostname in nodes else pod.metadata.annotations[ 'archMinerName'] remoteFilePath = join('home/dump', fileName + '.json') localFilePath = join(cls.config['monitoringFiles'], fileName + '.json') os.system('kubectl cp -c ' + containerName + ' ' + pod.metadata.namespace + '/' + pod.metadata.name + ':' + remoteFilePath + ' ' + localFilePath) #Create edges print(' Analyzing packets...') try: files = cls._listFiles(cls.config['monitoringFiles']) except WrongFolderError: cls._cleanEnvironment() raise for monitoringFilePath in files: if os.path.getsize(monitoringFilePath) == 0: continue srcNodeName = monitoringFilePath.split('/')[-1].replace( '.json', '') with open(monitoringFilePath, 'rb') as monitoringFile: for packet in ijson.items(monitoringFile, 'item'): if cls._isOutgoingPacket(packet, nodes, srcNodeName): cls._createEdge(packet, nodes, srcNodeName) #Create communications commFactory = ConcreteCommunicationFactory() for monitoringFilePath in files: if os.path.getsize(monitoringFilePath) == 0: continue srcNodeName = monitoringFilePath.split('/')[-1].replace( '.json', '') with open(monitoringFilePath, 'rb') as monitoringFile: for packet in ijson.items(monitoringFile, 'item'): if cls._isOutgoingPacket(packet, nodes, srcNodeName): cls._createCommunication(packet, nodes, commFactory, srcNodeName) for newNode in newNodes: edges = nodes[newNode['controller']].getEdges( Direction.OUTGOING) if not edges: nodes.pop(newNode['controller'], None) for service in newNode['services']: nodes.pop(service, None) finally: cls._cleanEnvironment()