def list_running_jobs(team_only: bool = False): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] load_kube_config() username = os.environ.get("JUPYTERHUB_USER", os.environ.get("USERNAME")) api_instance = BatchV1Api() # field_selector = "status.successful!=1" if team_only: operand = "!=" else: operand = "=" label_selector = f"app=orbit-runner,username{operand}{username}" _logger.info("using job selector %s", label_selector) try: api_response = api_instance.list_namespaced_job( namespace=team_name, _preload_content=False, label_selector=label_selector, # field_selector=field_selector, watch=False, ) res = json.loads(api_response.data) except ApiException as e: _logger.info("Exception when calling BatchV1Api->list_namespaced_job: %s\n" % e) raise e if "items" not in res: return [] return res["items"]
def delete_task_schedule_eks(triggerName: str) -> None: props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] load_kube_config() BatchV1beta1Api().delete_namespaced_cron_job( name=f"orbit-{team_name}-{triggerName}", namespace=team_name)
def delete_storage_pvc(pvc_name: str): load_kube_config() api_instance = CoreV1Api() props = get_properties() params: Dict[str, Any] = {} params["name"] = pvc_name team_name = props["AWS_ORBIT_TEAM_SPACE"] params["namespace"] = os.environ.get("AWS_ORBIT_USER_SPACE", team_name) params["_preload_content"] = False try: api_response = api_instance.delete_namespaced_persistent_volume_claim( **params) response = { "status": str(api_response.status), "reason": api_response.reason, "message": f"Successfully deleted persistent volume claim={pvc_name}", } except ApiException as e: _logger.info( "Exception when calling CoreV1Api->delete persistent volume claim: %s\n" % e) e_body = json.loads(e.body) response = { "status": str(e_body["code"]), "reason": e_body["reason"], "message": e_body["message"] } return response
def _get_cred_to_redshift_cluster(self, cluster_name: str) -> Dict[str, str]: """ Invokes Lambda Function to Connect to a Redshift Cluster and returns user credentials (username and password) upon successful connection. """ props = get_properties() funcName = "ConnectToRedshiftFunction" orbit = props["AWS_ORBIT_ENV"] team_space = props["AWS_ORBIT_TEAM_SPACE"] functionName = "{}-{}-{}".format(orbit, team_space, funcName) lambda_client = boto3.client("lambda") invoke_response = lambda_client.invoke( FunctionName=functionName, Payload=bytes(json.dumps({"cluster_name": cluster_name}), "utf-8"), InvocationType="RequestResponse", LogType="Tail", ) response_payload = json.loads( invoke_response["Payload"].read().decode("utf-8")) if "200" != response_payload["statusCode"]: logger.error(response_payload) raise Exception("could not connect to cluster") return { "user": response_payload["user"], "password": response_payload["password"], }
def schedule_task_eks(triggerName: str, frequency: str, taskConfiguration: dict) -> Any: """ Parameters ---------- triggerName: str A unique name of the time trigger that will start this exec frequency: str A cron string e.g., cron(0/15 * 1/1 * ? *) to define the starting times of the execution taskConfiguration: Any Return ------ Example ------- Example for schedule_task similar to run_python and run_notebook tasks (refer to 'run_python'). """ props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] node_type = get_node_type(taskConfiguration) username = (os.environ.get("JUPYTERHUB_USER", os.environ.get("USERNAME"))).split("@")[0] cronjob_id = f"orbit-{team_name}-{triggerName}" labels = { "app": f"orbit-runner", "orbit/node-type": node_type, "username": username, "cronjob_id": cronjob_id } team_constants: TeamConstants = TeamConstants(username) job_spec = _create_eks_job_spec(taskConfiguration, labels=labels, team_constants=team_constants) cron_job_template: V1beta1JobTemplateSpec = V1beta1JobTemplateSpec( spec=job_spec) cron_job_spec: V1beta1CronJobSpec = V1beta1CronJobSpec( job_template=cron_job_template, schedule=frequency) job = V1beta1CronJob( api_version="batch/v1beta1", kind="CronJob", metadata=V1ObjectMeta(name=cronjob_id, labels=labels, namespace=team_name), status=V1beta1CronJobStatus(), spec=cron_job_spec, ) load_kube_config() job_instance: V1beta1CronJob = BatchV1beta1Api( ).create_namespaced_cron_job(namespace=team_name, body=job) metadata: V1ObjectMeta = job_instance.metadata return { "ExecutionType": "eks", "Identifier": metadata.name, } metadata: V1ObjectMeta = job_instance.metadata _logger.debug(f"started job {metadata.name}")
def delete_task_schedule_eks(triggerName: str) -> None: props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] namespace = os.environ.get("AWS_ORBIT_USER_SPACE", team_name) dynamic_client = _dynamic_client() api = dynamic_client.resources.get(api_version=ORBIT_API_VERSION, group=ORBIT_API_GROUP, kind="OrbitJob") api.delete(name=f"orbit-{team_name}-{triggerName}", namespace=namespace)
def _run_task_eks(taskConfiguration: dict) -> Any: """ Runs Task in Python in a notebook using lambda. Parameters ---------- taskConfiguration: dict A task definition to execute. Returns ------- Response Payload """ props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] username = (os.environ.get("JUPYTERHUB_USER", os.environ.get("USERNAME"))).split("@")[0] node_type = get_node_type(taskConfiguration) labels = { "app": f"orbit-runner", "orbit/node-type": node_type, "username": username } if node_type == "ec2": labels["orbit/attach-security-group"] = "yes" team_constants: TeamConstants = TeamConstants() job_spec = _create_eks_job_spec(taskConfiguration, labels=labels, team_constants=team_constants) load_kube_config() if "compute" in taskConfiguration: if "labels" in taskConfiguration["compute"]: labels = {**labels, **taskConfiguration["compute"]["labels"]} job = V1Job( api_version="batch/v1", kind="Job", metadata=V1ObjectMeta( generate_name=f"orbit-{team_name}-{node_type}-runner-", labels=labels, namespace=team_name), spec=job_spec, ) job_instance: V1Job = BatchV1Api().create_namespaced_job( namespace=team_name, body=job, ) metadata: V1ObjectMeta = job_instance.metadata _logger.debug(f"started job {metadata.name}") return { "ExecutionType": "eks", "Identifier": metadata.name, "NodeType": node_type, "tasks": taskConfiguration["tasks"], }
def _get_invoke_function_name() -> Any: """ Get invoke function Name. Returns ------- Function Name. """ props = get_properties() functionName = f"orbit-{props['AWS_ORBIT_ENV']}-{props['AWS_ORBIT_TEAM_SPACE']}-container-runner" return functionName
def get_team_clusters( self, cluster_id: Optional[str] = None ) -> Dict[str, Dict[str, Union[str, Dict[str, Union[str, int]]]]]: """ Retrieves Redshift Cluster information for the Team Cluster. Parameters ---------- cluster_id : str, optional Gets information for a specific cluster Id. Default looks at cluster tagged with 'AWS_ORBIT_TEAM_SPACE' Returns ------- clusters_info : Dict[str, Dict[str, Union[str, Dict[str, Union[str, int]]]]] Information on the cluster(s) and their configuration. Example -------- >>> from aws.utils.notebooks.database import RedshiftUtils >>> from aws.utils.notebooks.common import get_workspace >>> RedshiftUtils.get_team_clusters(cluster_id= "my_cluster") """ redshift = boto3.client("redshift") props = get_properties() if cluster_id is None: redshift_cluster_search_tag = props["AWS_ORBIT_ENV"] + "-" + props[ "AWS_ORBIT_TEAM_SPACE"] clusters = redshift.describe_clusters( TagValues=[redshift_cluster_search_tag])["Clusters"] else: clusters = redshift.describe_clusters( ClusterIdentifier=cluster_id, )["Clusters"] clusters_info: Dict[str, Any] = {} for cluster in clusters: cluster_id = cluster.get("ClusterIdentifier", "") cluster_model: Dict[str, Any] = {} cluster_model["cluster_id"] = cluster_id cluster_model["Name"] = cluster_id cluster_model["State"] = cluster["ClusterStatus"] if "Endpoint" in cluster and "Address" in cluster["Endpoint"]: cluster_model[ "ip"] = f"{cluster['Endpoint']['Address']}:{cluster['Endpoint']['Port']}" cluster_nodes_info = { "node_type": cluster["NodeType"], "nodes": len(cluster["ClusterNodes"]), } cluster_model["instances"] = cluster_nodes_info clusters_info[cast(str, cluster_id)] = cluster_model cluster_model["info"] = cluster return clusters_info
def delete_all_my_jobs(): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] username = os.environ.get("JUPYTERHUB_USER", os.environ.get("USERNAME")) load_kube_config() api_instance = BatchV1Api() label_selector = f"app=orbit-runner,username={username}" try: api_instance.delete_collection_namespaced_job( namespace=team_name, _preload_content=False, orphan_dependents=False, label_selector=label_selector ) except ApiException as e: _logger.info("Exception when calling BatchV1Api->delete_collection_namespaced_job: %s\n" % e) raise e
def delete_cronjob(job_name: str, grace_period_seconds: int = 30): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] load_kube_config() api_instance = BatchV1beta1Api() try: api_instance.delete_namespaced_cron_job( name=job_name, namespace=team_name, _preload_content=False, grace_period_seconds=grace_period_seconds, orphan_dependents=False, ) except ApiException as e: _logger.info("Exception when calling BatchV1Api->delete_namespaced_cron_job: %s\n" % e) raise e
def list_running_cronjobs(): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] load_kube_config() api_instance = BatchV1beta1Api() try: api_response = api_instance.list_namespaced_cron_job(namespace=team_name, _preload_content=False) res = json.loads(api_response.data) except ApiException as e: _logger.info("Exception when calling BatchV1beta1Api->list_namespaced_cron_job: %s\n" % e) raise e if "items" not in res: return [] return res["items"]
def create_cluster(self, cluster_name: str, number_of_nodes: str, node_type: str) -> Dict[str, str]: """ Creates a Redshift Cluster. """ props = get_properties() env = props["AWS_ORBIT_ENV"] team_space = props["AWS_ORBIT_TEAM_SPACE"] cluster_def_func = f"orbit-{env}-{team_space}-StartRedshift-Standard" cluster_identifier = f"orbit-{env}-{team_space}-{cluster_name}".lower() cluster_args = { "cluster_name": cluster_identifier, "Nodes": number_of_nodes, "NodeType": node_type } lambda_client = boto3.client("lambda") invoke_response = lambda_client.invoke( FunctionName=cluster_def_func, Payload=bytes(json.dumps(cluster_args), "utf-8"), InvocationType="RequestResponse", LogType="Tail", ) response_payload = json.loads( invoke_response["Payload"].read().decode("utf-8")) if "statusCode" not in response_payload or "200" != response_payload[ "statusCode"]: logger.error(response_payload) error_message = response_payload["errorMessage"] response = { "status": "400", "message": f"Error creating Redshift cluster - {error_message}" } else: cluster_id = response_payload["cluster_id"] logger.info("cluster created: %s", cluster_id) response = { "status": str(response_payload["statusCode"]), "message": f"Successfully created Redshift cluster {cluster_id}", } return response
def delete_cronjob(job_name: str, grace_period_seconds: int = 30): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] api = _dynamic_client().resources.get(api_version=ORBIT_API_VERSION, group=ORBIT_API_GROUP, kind="OrbitJob") try: api.delete( name=job_name, namespace=os.environ.get("AWS_ORBIT_USER_SPACE", team_name), grace_period_seconds=grace_period_seconds, orphan_dependents=False, ) except ApiException as e: _logger.info( "Exception when calling DynamicClient.delete() for OrbitJobs: %s\n" % e) raise e
def delete_all_my_jobs(): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] namespace = os.environ.get("AWS_ORBIT_USER_SPACE", team_name) api = _dynamic_client().resources.get(api_version=ORBIT_API_VERSION, group=ORBIT_API_GROUP, kind="OrbitJob") label_selector = "k8sJobType=Job" try: api.delete(namespace=namespace, orphan_dependents=False, label_selector=label_selector) except ApiException as e: _logger.info( "Exception when calling DynamicClient.delete() for OrbitJobs: %s\n" % e) raise e
def get_virtual_cluster_id() -> str: emr = boto3.client("emr-containers") props = get_properties() env_name = props["AWS_ORBIT_ENV"] team_space = props["AWS_ORBIT_TEAM_SPACE"] response = emr.list_virtual_clusters( containerProviderId=f"orbit-{env_name}", containerProviderType="EKS", states=["RUNNING"], maxResults=500) if "virtualClusters" not in response or len( response["virtualClusters"]) == 0: raise Exception("Virtual EMR Cluster not found") for c in response["virtualClusters"]: if c["name"] == f"orbit-{env_name}-{team_space}": return c["id"] else: return ""
def get_nodegroups(cluster_name: str): props = get_properties() env_name = props["AWS_ORBIT_ENV"] nodegroups_with_lt = [] nodegroups: List[Dict[str, Any]] = [] _logger.debug(f"Fetching cluster {cluster_name} nodegroups") try: response: Dict[str, Any] = boto3.client("lambda").invoke( FunctionName=f"orbit-{env_name}-eks-service-handler", InvocationType="RequestResponse", Payload=json.dumps({ "cluster_name": cluster_name }).encode("utf-8"), ) if response.get("StatusCode") != 200 or response.get( "Payload") is None: _logger.error(f"Invalid Lambda response:\n{response}") return nodegroups nodegroups = json.loads(response["Payload"].read().decode("utf-8")) except Exception as ekse: _logger.error("Error invoking nodgroup lambda %s", ekse) raise ekse # Get launch template details per nodegroup ec2_client = boto3.client("ec2") for nodegroup in nodegroups: try: ng = nodegroup if "launch_template" in nodegroup: ltr_response = ec2_client.describe_launch_template_versions( LaunchTemplateId=nodegroup["launch_template"]["id"], Versions=[nodegroup["launch_template"]["version"]], ) if ltr_response["LaunchTemplateVersions"]: launch_template = ltr_response["LaunchTemplateVersions"][0] ng["launch_template_data"] = launch_template[ "LaunchTemplateData"]["BlockDeviceMappings"] del ng["launch_template"] nodegroups_with_lt.append(ng) except Exception as lte: _logger.error( "Error invoking describe_launch_template_versions %s", lte) return nodegroups_with_lt
def _get_emr_functions() -> Dict[str, Any]: """ Returns a list of all EMR Launch functions and their current values. """ props = get_properties() env_name = props["AWS_ORBIT_ENV"] team_space = props["AWS_ORBIT_TEAM_SPACE"] namespace = f"orbit-{env_name}-{team_space}" res = _get_functions(namespace) functionList = _get_functions(namespace=namespace)["EMRLaunchFunctions"] while "NextToken" in res: temp = _get_functions(res["NextToken"])["EMRLaunchFunctions"] functionList.extend(temp) func = dict() for f in functionList: func[f["LaunchFunctionName"]] = f return func
def delete_pod(pod_name: str, grace_period_seconds: int = 30): props = get_properties() global __CURRENT_TEAM_MANIFEST__, __CURRENT_ENV_MANIFEST__ team_name = props["AWS_ORBIT_TEAM_SPACE"] load_kube_config() api_instance = CoreV1Api() try: api_instance.delete_namespaced_pod( name=pod_name, namespace=os.environ.get("AWS_ORBIT_USER_SPACE", team_name), _preload_content=False, grace_period_seconds=grace_period_seconds, orphan_dependents=False, ) except ApiException as e: _logger.info( "Exception when calling CoreV1Api->delete_namespaced_pod: %s\n" % e) raise e
def delete_all_my_pods(): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] namespace = os.environ.get("AWS_ORBIT_USER_SPACE", team_name) load_kube_config() api_instance = CoreV1Api() app_list = ",".join(APP_LABEL_SELECTOR) label_selector = f"app in ({app_list})" try: api_instance.delete_collection_namespaced_pod( namespace=namespace, _preload_content=False, orphan_dependents=False, label_selector=label_selector) except ApiException as e: _logger.info( "Exception when calling CoreV1Api->delete_collection_namespaced_pod: %s\n" % e) raise e
def _get_redshift_functions(self) -> Dict[str, Dict[str, str]]: """ Returns a dictionary of all redshift function names with their parameters/configuration using Lambda.list_functions(). """ lambda_client = boto3.client("lambda") props = get_properties() env_name = props["AWS_ORBIT_ENV"] team_space = props["AWS_ORBIT_TEAM_SPACE"] namespace = f"{env_name}-{team_space}" funcs = [] while True: response = lambda_client.list_functions() token = response["NextMarker"] if "NextMarker" in response else None for func in response["Functions"]: if (namespace in func["FunctionName"] and "Environment" in func and "Variables" in func["Environment"] and "RedshiftClusterParameterGroup" in func["Environment"]["Variables"] and "StartRedshift" in func["FunctionName"]): funcs.append(func) if token is None: break func_descs = {} for f in funcs: user_name = f["FunctionName"][f["FunctionName"]. index("StartRedshift") + len("StartRedshift-"):] func_desc = f["Environment"]["Variables"] del func_desc["RedshiftClusterParameterGroup"] del func_desc["RedshiftClusterSubnetGroup"] del func_desc["RedshiftClusterSecurityGroup"] del func_desc[ORBIT_ENV] del func_desc[AWS_ORBIT_TEAM_SPACE] del func_desc["SecretId"] del func_desc["PortNumber"] del func_desc["Role"] func_descs[user_name] = func_desc return func_descs
def list_current_pods(label_selector: str = None): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] load_kube_config() api_instance = CoreV1Api() try: params = dict() params["namespace"] = team_name params["_preload_content"] = False if label_selector: params["label_selector"] = label_selector api_response = api_instance.list_namespaced_pod(**params) res = json.loads(api_response.data) except ApiException as e: _logger.info("Exception when calling BatchV1Api->list_namespaced_job: %s\n" % e) raise e if "items" not in res: return [] return res["items"]
def list_storage_pvc(): load_kube_config() api_instance = CoreV1Api() props = get_properties() params = dict() team_name = props["AWS_ORBIT_TEAM_SPACE"] params["namespace"] = os.environ.get("AWS_ORBIT_USER_SPACE", team_name) params["_preload_content"] = False try: api_response = api_instance.list_namespaced_persistent_volume_claim( **params) res = json.loads(api_response.data) except ApiException as e: _logger.info( "Exception when calling CoreV1Api->list persistent volume claims: %s\n" % e) raise e if "items" not in res: return [] return res["items"]
def delete_redshift_cluster(self, cluster_name: str) -> None: """ Deletes a redshift cluster. Parameters ---------- cluster_name : str The Redshift cluster name given when creating the cluster Returns ------- None None. Example -------- >>> from aws.utils.notebooks.database import RedshiftUtils >>> from aws.utils.notebooks.common import get_properties >>> RedshiftUtils.delete_redshift_cluster(cluster_name = "my_cluster") """ props = get_properties() env = props["AWS_ORBIT_ENV"] team_space = props["AWS_ORBIT_TEAM_SPACE"] redshift = boto3.client("redshift") namespace = f"orbit-{env}-{team_space}-" cluster_name_value = cluster_name.lower() cluster_identifier = cluster_name if namespace in cluster_name_value else namespace + cluster_name_value res = redshift.delete_cluster(ClusterIdentifier=cluster_identifier, SkipFinalClusterSnapshot=True) if "errorMessage" in res: logger.error(res["errorMessage"]) else: logger.info("Cluster termination started") return
def list_running_cronjobs(): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] namespace = os.environ.get("AWS_ORBIT_USER_SPACE", team_name) api = _dynamic_client().resources.get(api_version=ORBIT_API_VERSION, group=ORBIT_API_GROUP, kind="OrbitJob") label_selector = "k8sJobType=CronJob" try: api_response = api.get(namespace=namespace, label_selector=label_selector) res = api_response.to_dict() except ApiException as e: _logger.info( "Exception when calling DynamicClient.get() for OrbitJobs: %s\n" % e) raise e return [ oj for oj in res.get("items", []) if oj.get("status", {}).get( "orbitJobOperator", {}).get("jobStatus") == "Active" ]
def get_execution_history(notebookDir: str, notebookName: str) -> pd.DataFrame: """ Get Notebook Execution History Parameters ---------- notebookDir: str Name of notebook directory. notebookName: str Name of notebook. Returns ------- df: pd.DataFrame Notebook execution history. Example -------- >>> from aws_orbit_sdk import controller >>> controller.get_execution_history(notebookDir="notebook-directory", notebookName='mynotebook') """ props = get_properties() return _get_execution_history_from_local(notebookDir, notebookName, props)
def schedule_task_eks(triggerName: str, frequency: str, taskConfiguration: dict) -> Any: """ Parameters ---------- triggerName: str A unique name of the time trigger that will start this exec frequency: str A cron string e.g., cron(0/15 * 1/1 * ? *) to define the starting times of the execution taskConfiguration: Any Return ------ Example ------- Example for schedule_task similar to run_python and run_notebook tasks (refer to 'run_python'). """ props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] namespace = os.environ.get("AWS_ORBIT_USER_SPACE", team_name) job_spec = _create_eks_job_spec(taskConfiguration) job_spec["spec"]["schedule"] = frequency job_spec["spec"]["triggerName"] = triggerName job_spec["spec"]["notebookName"] = "schduled" job_spec["metadata"]["name"] = f"orbit-{namespace}-{triggerName}" dynamic_client = _dynamic_client() api = dynamic_client.resources.get(api_version=ORBIT_API_VERSION, group=ORBIT_API_GROUP, kind="OrbitJob") job_instance = api.create(namespace=namespace, body=job_spec).to_dict() metadata = job_instance["metadata"] _logger.debug(f"started job {metadata['name']}") return { "ExecutionType": "eks", "Identifier": metadata["name"], }
def _run_task_eks(taskConfiguration: dict) -> Any: """ Runs Task in Python in a notebook using lambda. Parameters ---------- taskConfiguration: dict A task definition to execute. Returns ------- Response Payload """ props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] namespace = os.environ.get("AWS_ORBIT_USER_SPACE", team_name) node_type = get_node_type(taskConfiguration) job_spec = _create_eks_job_spec(taskConfiguration) job_spec["spec"]["notebookName"] = os.environ.get("HOSTNAME", "") job_spec["metadata"][ "generateName"] = f"orbit-{team_name}-{node_type}-runner-" dynamic_client = _dynamic_client() api = dynamic_client.resources.get(api_version=ORBIT_API_VERSION, group=ORBIT_API_GROUP, kind="OrbitJob") job_instance = api.create(namespace=namespace, body=job_spec).to_dict() metadata = job_instance["metadata"] _logger.debug(f"started job {metadata['name']}") return { "ExecutionType": "eks", "Identifier": metadata["name"], "NodeType": node_type, "tasks": taskConfiguration["tasks"], }
def list_my_running_pods(): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] namespace = os.environ.get("AWS_ORBIT_USER_SPACE", team_name) return list_running_pods(namespace)
def list_team_running_pods(): props = get_properties() team_name = props["AWS_ORBIT_TEAM_SPACE"] return list_running_pods(team_name)