def check_workers_running(app_config, namespace): """Checks if all worker tasks are up and running. Arguments: app_config {app_config_parser.AppConfig} -- app config objectj namespace {string} -- k8s namespace of execution profiler Returns: bool -- True if all workers are running, False if not. """ # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) core_v1_api = client.CoreV1Api() result = True for node in app_config.node_map(): if node.startswith('home'): # ignore checking on home status continue label = app_config.app_name + '-' + node + "exec_profiler" resp = core_v1_api.list_namespaced_pod(namespace, label_selector=label) # if a pod is running just delete it if resp.items: a = resp.items[0] if a.status.phase != "Running": log.debug(f"Execution Profiler pod not yet running on {node}") result = False if result is True: log.info("All execution profiler workers successfully running.") return result
def check_workers_running(app_config, namespace): """Checks if all worker tasks are up and running. Arguments: app_config {app_config_parser.AppConfig} -- app config objectj namespace {string} -- k8s namespace of execution profiler Returns: bool -- True if all workers are running, False if not. """ # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) core_v1_api = client.CoreV1Api() result = True for task in app_config.get_dag_task_names(): label = "app=" + app_config.app_name + '-' + task resp = core_v1_api.list_namespaced_pod(namespace, label_selector=label) # if a pod is running just delete it if resp.items: a = resp.items[0] if a.status.phase != "Running": log.debug(f"Wave mapper pod not yet running for {task}") result = False if result is True: log.info("All execution profiler workers successfully running.") return result
# these are the filenames to process, all dictated by TEST_INDICATORS # First two binary digits indicate the test flags for coding # Letters indicate test number (to differentiate from the flags) # postfix "sleep" indicates if artificial sleeps are injected in the test # examples: "11a", "01a", "01b" "01a-sleep" TEST_INDICATORS = "%d%d%s-%s" % (ccdag.CODING_PART1, ccdag.CODING_PART2, ccdag.EXP_ID, ccdag.EXP_NAME) print(TEST_INDICATORS) # see how file names are structured in main APP_DIR = os.path.dirname(os.path.abspath(__file__)) # Parse app_config.yaml. Keep as a global to use in your app code. app_config = app_config_parser.AppConfig(APP_DIR) config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) core_v1_api = client.CoreV1Api() os.makedirs("results", exist_ok=True) results_path = "results/%s" % (TEST_INDICATORS) os.makedirs(results_path, exist_ok=True) classid = np.arange(0, len(ccdag.classlist), 1) classid = [str(x) for x in classid] classmap = dict(zip(classid, ccdag.classlist)) rt_enter_node = dict() rt_exit_node = dict() rt_enter_queue = dict() rt_exit_queue = dict() rt_datasource = dict() rt_home = dict()
def launch_wave(): # Parse app's app_config.yaml app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) namespace = app_config.namespace_prefix() + "-mapper" os.system(f"kubectl create namespace {namespace}") # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) # manually set proxy k8s_apps_v1 = client.AppsV1Api() core_v1_api = client.CoreV1Api() exec_prof_home_ip = lookup_home_ip("-exec", app_config, core_v1_api) drupe_home_ip = lookup_home_ip("-profiler", app_config, core_v1_api) """ Create k8s service for the home task. This task will signal profiling for all the execution profiler workers and collect results. K8s services exposes ports of pods to the entire k8s cluster. This does not launch pods. """ home_svc_name = app_config.app_name + "-home" home_svc_spec = k8s_spec.service.generate( name=home_svc_name, port_mappings=jupiter_config.k8s_service_port_mappings()) resp = core_v1_api.create_namespaced_service(namespace, home_svc_spec) log.debug("Home service created. status = '%s'" % str(resp.status)) try: resp = core_v1_api.read_namespaced_service(home_svc_name, namespace) except ApiException: log.error("Unable to read namespaced service") sys.exit(1) home_node_ip = resp.spec.cluster_ip all_workers_ips = [] all_workers_names = [] for node in app_config.node_map(): if node.startswith('home'): # skip scheduling tasks on the home node continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.service.generate( name=pod_name, port_mappings=jupiter_config.k8s_service_port_mappings()) try: resp = core_v1_api.create_namespaced_service(namespace, spec) log.debug("Service created. status = '%s'" % str(resp.status)) resp = core_v1_api.read_namespaced_service(pod_name, namespace) except ApiException: log.error("Unable to create service for {}".format(pod_name)) sys.exit(1) all_workers_ips.append(resp.spec.cluster_ip) all_workers_names.append(node) all_workers_ips = ':'.join(all_workers_ips) all_workers_names = ':'.join(all_workers_names) for node, host in app_config.node_map().items(): if node.startswith('home'): # do not deploy pods on home yet. will be done afterwards. continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.deployment.generate( name=pod_name, label=pod_name, image=app_config.get_wave_worker_tag(), host=host, port_mappings=jupiter_config.k8s_deployment_port_mappings(), # inject any arbitrary environment variables here env_vars={ "NODE_NAME": node, "HOME_NODE_IP": home_node_ip, "DRUPE_WORKER_IPS": drupe_worker_names_to_ips(app_config, core_v1_api), "WORKER_NODE_NAMES": all_workers_names, "WORKER_NODE_IPS": all_workers_ips, "EXEC_PROF_HOME_IP": exec_prof_home_ip, "DRUPE_HOME_IP": drupe_home_ip, }) # # Call the Kubernetes API to create the deployment resp = k8s_apps_v1.create_namespaced_deployment(body=spec, namespace=namespace) log.debug("Deployment created. status ='%s'" % str(resp.status)) # check if worker deployment pods are running while check_workers_running(app_config, namespace) is False: log.debug("WAVE worker pods still deploying, waiting...") time.sleep(30) home_depl_spec = k8s_spec.deployment.generate( name=app_config.app_name + "-home", label=app_config.app_name + "-home", image=app_config.get_wave_home_tag(), host=app_config.home_host(), port_mappings=jupiter_config.k8s_deployment_port_mappings(), env_vars={ "NODE_NAME": "home", "WORKER_NODE_NAMES": all_workers_names, "WORKER_NODE_IPS": all_workers_ips, "DRUPE_WORKER_IPS": drupe_worker_names_to_ips(app_config, core_v1_api), "EXEC_PROF_HOME_IP": exec_prof_home_ip, "FIRST_TASK": app_config.get_first_task(), "DRUPE_HOME_IP": drupe_home_ip }) resp = k8s_apps_v1.create_namespaced_deployment(body=home_depl_spec, namespace=namespace) log.debug("WAVE home deployment created. status = '%s'" % str(resp.status)) log.info('Successfully deployed WAVE') # Setup k8s proxy and retrieve mapping from WAVE home pod proxy_proc = setup_proxy(jupiter_config.kubectl_proxy_mapper()) svc_port, _ = jupiter_config.flask_port_mapping() url = f"http://localhost:{8081}/api/v1/" \ + f"namespaces/{namespace}/services/{app_config.app_name}-home:{svc_port}/proxy" log.info("Waiting for WAVE pod to boot...") log.info( f"namespaces/{namespace}/services/{app_config.app_name}-home:{svc_port}/proxy" ) time.sleep(10) while 1: try: log.debug('Trying to get the assignment from WAVE mapper') r = requests.get(url) mapping = json.dumps(r.json(), indent=4) log.info(f"mapping:\n{mapping}") if len(mapping) > 2: if "status" not in mapping: break except: log.debug("WAVE not finished, retry in 30 sec...") time.sleep(30) with open("mapping.json", 'w') as f: f.write(json.dumps(r.json(), indent=4)) log.info("Wrote mapping to file mapping.json. Ready to launch CIRCE.") # TODO: print message talking about killing proxy proxy_proc.kill()
def launch_heft(): # Parse app's app_config.yaml app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) namespace = app_config.namespace_prefix() + "-mapper" app_name = app_config.app_name os.system(f"kubectl create namespace {namespace}") # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) # manually set proxy k8s_apps_v1 = client.AppsV1Api() core_v1_api = client.CoreV1Api() exec_prof_home_ip = lookup_home_ip("-exec", app_config, core_v1_api) log.info('Starting to deploy HEFT (a single home pod)') home_svc_name = app_name + "-home" spec = k8s_spec.service.generate( name=home_svc_name, port_mappings=jupiter_config.k8s_service_port_mappings()) resp = core_v1_api.create_namespaced_service(namespace, spec) log.debug("Home service created. status = '%s'" % str(resp.status)) try: resp = core_v1_api.read_namespaced_service(home_svc_name, namespace) except ApiException: log.error("Unable to read namespaced service") sys.exit(1) exec_prof_home_ip = lookup_home_ip("-exec", app_config, core_v1_api) drupe_home_ip = lookup_home_ip("-profiler", app_config, core_v1_api) home_depl_spec = k8s_spec.deployment.generate( name=app_name + "-home", label=app_name + "-home", image=app_config.get_mapper_tag(), host=app_config.home_host(), port_mappings=jupiter_config.k8s_deployment_port_mappings(), env_vars={ "NODE_NAME": "home", "HOME_NODE_IP": resp.spec.cluster_ip, "DRUPE_WORKER_IPS": drupe_worker_names_to_ips(app_config, core_v1_api), "WORKER_NODE_NAMES": concat_worker_names(app_config), "EXEC_PROF_HOME_IP": exec_prof_home_ip, "DRUPE_HOME_IP": drupe_home_ip, "TASK_MAPPER": app_config.task_mapper(), }) resp = k8s_apps_v1.create_namespaced_deployment(body=home_depl_spec, namespace=namespace) log.debug("HEFT home deployment created. status = '%s'" % str(resp.status)) log.info('Successfully deployed HEFT') # Setup k8s proxy and retrieve mapping from HEFT pod proxy_proc = setup_proxy(jupiter_config.kubectl_proxy_mapper()) svc_port, _ = jupiter_config.flask_port_mapping() url = f"http://localhost:{8081}/api/v1/" \ + f"namespaces/{namespace}/services/{app_name}-home:{svc_port}/proxy" log.info("Waiting for HEFT pod to boot...") time.sleep(10) while 1: try: log.debug('Trying to get the assignment from HEFT mapper') r = requests.get(url) mapping = json.dumps(r.json(), indent=4) log.info(f"mapping:\n{mapping}") if len(mapping) != 0: if "status" not in mapping: break except: log.debug("HEFT not finished, retry in 30 sec...") time.sleep(30) with open("mapping.json", 'w') as f: f.write(json.dumps(r.json(), indent=4)) log.info("Wrote mapping to file mapping.json. Ready to launch CIRCE.") # TODO: print message talking about killing proxy proxy_proc.kill()
def launch_circe(task_mapping): # Parse app's app_config.yaml app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) namespace = app_config.namespace_prefix() + "-circe" os.system(f"kubectl create namespace {namespace}") # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) api = client.CoreV1Api() k8s_apps_v1 = client.AppsV1Api() # Compile port mappings for k8s services for Jupiter and the application svc_port_mappings = jupiter_config.k8s_service_port_mappings() try: for idx, mapping in enumerate(app_config.port_mappings()): svc, docker = mapping.split(':') svc_port_mappings.append({ "name": f"custom{idx}", "port": int(svc), "targetPort": int(docker) }) except Exception as e: logging.debug('No application port mappings') # Compile port mappings for k8s deployments for Jupiter and the application depl_port_mappings = jupiter_config.k8s_deployment_port_mappings() try: for idx, mapping in enumerate(app_config.port_mappings()): svc, docker = mapping.split(':') depl_port_mappings.append({ "name": f"custom{idx}", "containerPort": int(docker) }) except Exception as e: logging.debug('No application port mappings') # *** Create Home Task Service *** home_svc_name = app_config.app_name + "-home" home_svc_spec = k8s_spec.service.generate(name=home_svc_name, port_mappings=svc_port_mappings) resp = api.create_namespaced_service(namespace, home_svc_spec) log.debug("Home service created. status = '%s'" % str(resp.status)) try: resp = api.read_namespaced_service(home_svc_name, namespace) except ApiException: log.error("Unable to read namespaced service") sys.exit(1) home_task_ip = resp.spec.cluster_ip # *** Create DAG Task Services *** task_to_ip_string = create_services(app_config.app_name, namespace, app_config.get_dag_tasks(), api, svc_port_mappings) # *** Create Non-DAG Task Services *** nondag_task_to_ip_string = create_services(app_config.app_name, namespace, app_config.get_nondag_tasks(), api, svc_port_mappings) # *** Create DAG Task Deployments *** # Each DAG task to be launched on nodes designated by task_mapping # (e.g., derived from "mapping.json" file). Node names in task_mapping will # be mapped to the k8s hostname as indicated in app_config.yaml. node_map = app_config.node_map() for task in app_config.get_dag_tasks(): try: node = task_mapping[task['name']] k8s_hostname = node_map[node] except KeyError: log.fatal("Task missing in mapping file or node not in " + "app_config.yaml. Clean up with delete_all_circe.py.") exit() pod_name = app_config.app_name + '-' + task['name'] spec = k8s_spec.deployment.generate( name=pod_name, label=pod_name, image=app_config.get_circe_tag(), host=k8s_hostname, port_mappings=depl_port_mappings, # inject any arbitrary environment variables here env_vars={ "MY_TASK_NAME": task['name'], "CIRCE_HOME_IP": home_task_ip, "CIRCE_TASK_TO_IP": task_to_ip_string, "CIRCE_NONDAG_TASK_TO_IP": nondag_task_to_ip_string, }) resp = k8s_apps_v1.create_namespaced_deployment(body=spec, namespace=namespace) log.debug(f"DAG task deployment created. status={resp.status}") while check_dag_workers_running(app_config, namespace) is False: log.debug("CIRCE dag worker pods still deploying, waiting...") time.sleep(30) # *** Create Non-DAG Task Deployments *** for nondag_task in app_config.get_nondag_tasks(): pod_name = app_config.app_name + '-' + nondag_task['name'] spec = k8s_spec.deployment.generate( name=pod_name, label=pod_name, image=app_config.get_circe_tag(), host=nondag_task['k8s_host'], port_mappings=depl_port_mappings, # inject any arbitrary environment variables here env_vars={ "MY_TASK_NAME": nondag_task['name'], "CIRCE_HOME_IP": home_task_ip, "CIRCE_TASK_TO_IP": task_to_ip_string, "CIRCE_NONDAG_TASK_TO_IP": nondag_task_to_ip_string, }) resp = k8s_apps_v1.create_namespaced_deployment(body=spec, namespace=namespace) log.debug(f"Non-DAG task depl. created. status={resp.status}") while check_nondag_workers_running(app_config, namespace) is False: log.debug("CIRCE nondag worker pods still deploying, waiting...") time.sleep(30) # *** Create Home Task Deployment *** home_depl_spec = k8s_spec.deployment.generate( name=app_config.app_name + "-home", label=app_config.app_name + "-home", image=app_config.get_circe_tag(), host=app_config.home_host(), port_mappings=depl_port_mappings, env_vars={ "MY_TASK_NAME": "home", "CIRCE_HOME_IP": home_task_ip, "CIRCE_TASK_TO_IP": task_to_ip_string, "CIRCE_NONDAG_TASK_TO_IP": nondag_task_to_ip_string, }) resp = k8s_apps_v1.create_namespaced_deployment(body=home_depl_spec, namespace=namespace) log.debug(f"Home deployment created. status={resp.status}") log.info('CIRCE successfully deployed')
def main(): """ Deploy DRUPE in the system. """ # Parse app's app_config.yaml app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) namespace = app_config.namespace_prefix() + "-profiler" os.system(f"kubectl create namespace {namespace}") # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) api = client.CoreV1Api() k8s_apps_v1 = client.AppsV1Api() """ This loads the task graph and node list """ all_profiler_map = dict() home_svc_name = app_config.app_name + "-home" home_svc_spec = k8s_spec.service.generate( name=home_svc_name, port_mappings=jupiter_config.k8s_service_port_mappings()) resp = api.create_namespaced_service(namespace, home_svc_spec) log.debug("Home service created. status = '%s'" % str(resp.status)) try: resp = api.read_namespaced_service(home_svc_name, namespace) except ApiException as e: log.error("Unable to read namespaced service") sys.exit(1) home_node_ip = resp.spec.cluster_ip all_profiler_map['home'] = resp.spec.cluster_ip logging.debug('Home Profilers were created successfully!') all_profiler_ips = [] all_profiler_names = [] for node in app_config.node_map(): """ Generate the yaml description of the required service for each task """ if node.startswith('home'): continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.service.generate( name=pod_name, port_mappings=jupiter_config.k8s_service_port_mappings()) try: resp = api.create_namespaced_service(namespace, spec) log.debug("Service created. status = '%s'" % str(resp.status)) resp = api.read_namespaced_service(pod_name, namespace) except ApiException as e: log.error("Unable to create service for {}".format(pod_name)) sys.exit(1) all_profiler_ips.append(resp.spec.cluster_ip) all_profiler_names.append(node) all_profiler_map[node] = resp.spec.cluster_ip all_profiler_ips = ':'.join(all_profiler_ips) all_profiler_names = ':'.join(all_profiler_names) logging.debug('Worker Profilers were created successfully!') for node, host in app_config.node_map().items(): if node.startswith('home'): # do not deploy pods on home yet. will be done afterwards. continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.deployment.generate( name=pod_name, label=pod_name, image=app_config.get_drupe_worker_tag(), host=host, port_mappings=jupiter_config.k8s_deployment_port_mappings(), # inject any arbitrary environment variables here env_vars={ "NODE_NAME": node, "HOME_NODE_IP": home_node_ip, "ALL_NODE_IPS": all_profiler_ips, "ALL_NODE_NAMES": all_profiler_names, "NODE_IP": all_profiler_map[node] }) # # Call the Kubernetes API to create the deployment resp = k8s_apps_v1.create_namespaced_deployment(body=spec, namespace=namespace) log.debug("Deployment created. status ='%s'" % str(resp.status)) # check if worker deployment pods are running while check_workers_running(app_config, namespace) is False: log.debug("DRUPE profiler worker pods still deploying, waiting...") time.sleep(30) """ Create k8s deployment for home task and deploy it. """ home_depl_spec = k8s_spec.deployment.generate( name=app_config.app_name + "-home", label=app_config.app_name + "-home", image=app_config.get_drupe_home_tag(), host=app_config.home_host(), port_mappings=jupiter_config.k8s_deployment_port_mappings(), env_vars={ "NODE_NAME": "home", "HOME_NODE_IP": home_node_ip, "ALL_NODE_IPS": all_profiler_ips, "ALL_NODE_NAMES": all_profiler_names, "NODE_IP": all_profiler_map["home"] }) resp = k8s_apps_v1.create_namespaced_deployment(body=home_depl_spec, namespace=namespace) log.debug("Home deployment created. status = '%s'" % str(resp.status)) pprint(all_profiler_map) logging.debug('Successfully deploy DRUPE ') return (all_profiler_map)
def main(): # Parse app's app_config.yaml app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) namespace = app_config.namespace_prefix() + "-exec" os.system(f"kubectl create namespace {namespace}") # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) api = client.CoreV1Api() k8s_apps_v1 = client.AppsV1Api() """ Create k8s service for the home task. This task will signal profiling for all the execution profiler workers and collect results. K8s services exposes ports of pods to the entire k8s cluster. This does not launch pods. """ home_svc_name = app_config.app_name + "-home" home_svc_spec = k8s_spec.service.generate( name=home_svc_name, port_mappings=jupiter_config.k8s_service_port_mappings()) resp = api.create_namespaced_service(namespace, home_svc_spec) log.debug("Home service created. status = '%s'" % str(resp.status)) try: resp = api.read_namespaced_service(home_svc_name, namespace) except ApiException: log.error("Unable to read namespaced service") sys.exit(1) home_node_ip = resp.spec.cluster_ip """ Create k8s service for all execution profiler workers. There is one worker per "worker_tasks" in the app's app_config.yaml. This service exposes the ports of the pods to the entire k8s cluster. This does not launch pods. """ # to be injected into environment variables all_profiler_ips = [] all_profiler_names = [] for node in app_config.node_map(): if node.startswith('home'): # skip scheduling tasks on the home node continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.service.generate( name=pod_name, port_mappings=jupiter_config.k8s_service_port_mappings()) try: resp = api.create_namespaced_service(namespace, spec) log.debug("Service created. status = '%s'" % str(resp.status)) resp = api.read_namespaced_service(pod_name, namespace) except ApiException: log.error("Unable to create service for {}".format(pod_name)) sys.exit(1) all_profiler_ips.append(resp.spec.cluster_ip) all_profiler_names.append(node) all_profiler_ips = ':'.join(all_profiler_ips) all_profiler_names = ':'.join(all_profiler_names) """ Create k8s deployments for each worker task. Then, deploy it on the k8s cluster. """ for node, host in app_config.node_map().items(): if node.startswith('home'): # do not deploy pods on home yet. will be done afterwards. continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.deployment.generate( name=pod_name, label=pod_name, image=app_config.get_exec_worker_tag(), host=host, port_mappings=jupiter_config.k8s_deployment_port_mappings(), # inject any arbitrary environment variables here env_vars={ "NODE_NAME": node, "HOME_NODE_IP": home_node_ip, "ALL_PROFILER_IPS": all_profiler_ips, "ALL_PROFILER_NAMES": all_profiler_names }) # # Call the Kubernetes API to create the deployment resp = k8s_apps_v1.create_namespaced_deployment(body=spec, namespace=namespace) log.debug("Deployment created. status ='%s'" % str(resp.status)) # check if worker deployment pods are running while check_workers_running(app_config, namespace) is False: log.debug("Execution profiler worker pods still deploying, waiting...") time.sleep(30) """ Create k8s deployment for home task and deploy it. """ home_depl_spec = k8s_spec.deployment.generate( name=app_config.app_name + "-home", label=app_config.app_name + "-home", image=app_config.get_exec_home_tag(), host=app_config.home_host(), port_mappings=jupiter_config.k8s_deployment_port_mappings(), env_vars={ "NODE_NAME": "home", "HOME_NODE_IP": home_node_ip, "ALL_PROFILER_IPS": all_profiler_ips, "ALL_PROFILER_NAMES": all_profiler_names }) resp = k8s_apps_v1.create_namespaced_deployment(body=home_depl_spec, namespace=namespace) log.debug("Home deployment created. status = '%s'" % str(resp.status)) log.info('Successfully deployed execution profiler.')