def set_secondary_vpc_cidr_env_vars() -> None: sh.run( "kubectl set env daemonset aws-node -n kube-system AWS_VPC_K8S_CNI_CUSTOM_NETWORK_CFG=true" ) sh.run( "kubectl set env daemonset aws-node -n kube-system ENI_CONFIG_LABEL_DEF=topology.kubernetes.io/zone" )
def destroy_teams(context: "Context") -> None: stack_name: str = f"orbit-{context.name}" final_eks_stack_name: str = f"eksctl-{stack_name}-cluster" _logger.debug("EKSCTL stack name: %s", final_eks_stack_name) cluster_name = f"orbit-{context.name}" if cfn.does_stack_exist(stack_name=final_eks_stack_name) and context.teams: for team in context.teams: eks.delete_fargate_profile( profile_name=f"orbit-{context.name}-{team.name}", cluster_name=cluster_name, ) username = f"orbit-{context.name}-{team.name}-runner" arn = f"arn:aws:iam::{context.account_id}:role/{username}" for line in sh.run_iterating(f"eksctl get iamidentitymapping --cluster {cluster_name} --arn {arn}"): if line == f'Error: no iamidentitymapping with arn "{arn}" found': _logger.debug(f"Skipping non-existent IAM Identity Mapping - Role: {arn}") break else: _logger.debug(f"Removing IAM Identity Mapping - Role: {arn}") sh.run(f"eksctl delete iamidentitymapping --cluster {cluster_name} --arn {arn}") username = f"orbit-{context.name}-{team.name}" arn = cast(str, team.eks_pod_role_arn) for line in sh.run_iterating(f"eksctl get iamidentitymapping --cluster {cluster_name} --arn {arn}"): if line == f'Error: no iamidentitymapping with arn "{arn}" found': _logger.debug(f"Skipping non-existent IAM Identity Mapping - Role: {arn}") break else: _logger.debug(f"Removing IAM Identity Mapping - Role: {arn}") sh.run(f"eksctl delete iamidentitymapping --cluster {cluster_name} --arn {arn}") _logger.debug("EKSCTL Teams destroyed")
def map_iam_identities( context: Context, cluster_name: str, eks_system_masters_roles_changes: Optional[ListChangeset] ) -> None: if eks_system_masters_roles_changes and eks_system_masters_roles_changes.added_values: for role in eks_system_masters_roles_changes.added_values: if iam.get_role(role) is None: _logger.debug(f"Skipping nonexisting IAM Role: {role}") continue arn = f"arn:aws:iam::{context.account_id}:role/{role}" for line in sh.run_iterating(f"eksctl get iamidentitymapping --cluster {cluster_name} --arn {arn}"): if line.startswith("Error: no iamidentitymapping with arn"): _logger.debug(f"Adding IAM Identity Mapping - Role: {arn}, Username: {role}, Group: system:masters") sh.run( f"eksctl create iamidentitymapping --cluster {cluster_name} --arn {arn} " f"--username {role} --group system:masters" ) cast(List[str], context.eks_system_masters_roles).append(role) ContextSerDe.dump_context_to_ssm(context=context) break else: _logger.debug(f"Skip adding existing IAM Identity Mapping - Role: {arn}") if eks_system_masters_roles_changes and eks_system_masters_roles_changes.removed_values: for role in eks_system_masters_roles_changes.removed_values: arn = f"arn:aws:iam::{context.account_id}:role/{role}" for line in sh.run_iterating(f"eksctl get iamidentitymapping --cluster {cluster_name} --arn {arn}"): if line.startswith("Error: no iamidentitymapping with arn"): _logger.debug(f"Skip removing nonexisting IAM Identity Mapping - Role: {arn}") break else: _logger.debug(f"Removing IAM Identity Mapping - Role: {arn}") sh.run(f"eksctl delete iamidentitymapping --cluster {cluster_name} --arn {arn} --all") cast(List[str], context.eks_system_masters_roles).remove(role) ContextSerDe.dump_context_to_ssm(context=context)
def destroy_kubeflow(context: Context) -> None: stack_name: str = f"orbit-{context.name}" final_eks_stack_name: str = f"eksctl-{stack_name}-cluster" _logger.debug("EKSCTL stack name: %s", final_eks_stack_name) if cfn.does_stack_exist(stack_name=final_eks_stack_name): kubectl.write_kubeconfig(context=context) for line in sh.run_iterating("kubectl get namespace kubeflow"): if '"kubeflow" not found' in line: return cluster_name = f"orbit-{context.name}" output_path = os.path.join(".orbit.out", context.name, "kubeflow", cluster_name) gen_kubeflow_config(context, output_path, cluster_name) _logger.debug("Destroying Kubeflow") output_path = os.path.abspath(output_path) _logger.debug(f"kubeflow config dir: {output_path}") utils.print_dir(output_path) timeouts = 0 while timeouts < 3: try: _logger.info("Deleting kubeflow resources") sh.run("./delete_kf.sh", cwd=output_path) except FailedShellCommand: _logger.info( "The command returned a non-zero exit code. Retrying to delete resources" ) timeouts += 1 time.sleep(300)
def install_chart(repo: str, namespace: str, name: str, chart_name: str, chart_version: str) -> None: chart_version = aws_orbit.__version__.replace(".dev", "-") _logger.debug("Installing %s, version %s as %s from %s", chart_name, chart_version, name, repo) sh.run(f"helm upgrade --install --debug --namespace {namespace} --version " f"{chart_version} {name} {repo}/{chart_name}")
def _prepare_demo_data(bucket_name) -> None: _logger.debug("Adding CMS data sets") cms_files: List[str] = [ "https://www.cms.gov/Research-Statistics-Data-and-Systems/Downloadable-Public-Use-Files/SynPUFs/Downloads/DE1_0_2008_Beneficiary_Summary_File_Sample_1.zip", # noqa "http://downloads.cms.gov/files/DE1_0_2008_to_2010_Carrier_Claims_Sample_1A.zip", "http://downloads.cms.gov/files/DE1_0_2008_to_2010_Carrier_Claims_Sample_1B.zip", "https://www.cms.gov/Research-Statistics-Data-and-Systems/Downloadable-Public-Use-Files/SynPUFs/Downloads/DE1_0_2008_to_2010_Inpatient_Claims_Sample_1.zip", # noqa "https://www.cms.gov/Research-Statistics-Data-and-Systems/Downloadable-Public-Use-Files/SynPUFs/Downloads/DE1_0_2008_to_2010_Outpatient_Claims_Sample_1.zip", # noqa "http://downloads.cms.gov/files/DE1_0_2008_to_2010_Prescription_Drug_Events_Sample_1.zip", "https://www.cms.gov/Research-Statistics-Data-and-Systems/Downloadable-Public-Use-Files/SynPUFs/Downloads/DE1_0_2009_Beneficiary_Summary_File_Sample_1.zip", # noqa "https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/SynPUFs/Downloads/DE1_0_2010_Beneficiary_Summary_File_Sample_20.zip", # noqa ] _download_demo_data(bucket_name=bucket_name, bucket_key_prefix="landing/data/cms/", download_files=cms_files) _logger.debug("Adding SageMaker regression notebooks data sets") sagemaker_files: List[str] = [ "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data", "https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz", ] _download_demo_data(bucket_name=bucket_name, bucket_key_prefix="landing/data/sagemaker/", download_files=sagemaker_files) _logger.debug("Adding CSM schema files") cms_schema_files = os.path.join(ORBIT_CLI_ROOT, "data", "cms", "schema") schema_key_prefix = "landing/cms/schema/" sh.run( f"aws s3 cp --recursive {cms_schema_files} s3://{bucket_name}/{schema_key_prefix}" )
def build( account_id: str, region: str, dir: str, name: str, tag: str = "latest", use_cache: bool = True, pull: bool = False, build_args: Optional[List[str]] = None, ) -> None: ecr_address = f"{account_id}.dkr.ecr.{region}.amazonaws.com" repo_address = f"{ecr_address}/{name}" repo_address_tag = f"{repo_address}:{tag}" cache_str: str = "" pull_str: str = "--pull" if pull else "" build_args_str = " ".join([f"--build-arg {ba}" for ba in build_args]) if build_args else "" if use_cache: try: ecr_pull(name=repo_address, tag=tag) cache_str = f"--cache-from {repo_address_tag}" except exceptions.FailedShellCommand: _logger.debug(f"Docker cache not found at ECR {name}:{tag}") sh.run( f"docker build {pull_str} {cache_str} {build_args_str} --tag {name}:{tag} .", cwd=dir)
def package_chart(repo: str, chart_path: str, values: Optional[Dict[str, Any]]) -> Tuple[str, str, str]: chart_yaml = os.path.join(chart_path, "Chart.yaml") values_yaml = os.path.join(chart_path, "values.yaml") chart_version = aws_orbit.__version__.replace(".dev", "-") chart = yaml.safe_load( update_file(chart_yaml, { "orbit_version": aws_orbit.__version__, "chart_version": chart_version })) chart_version = chart["version"] if values: update_file(values_yaml, values) chart_name = chart_path.split("/")[-1] _logger.debug("Packaging %s at %s", chart_name, chart_path) for line in sh.run_iterating(f"helm package --debug {chart_path}"): if line.startswith("Successfully packaged chart and saved it to: "): chart_package = line.replace( "Successfully packaged chart and saved it to: ", "") _logger.debug("Created package: %s", chart_package) _logger.debug("Pushing %s to %s repository", chart_package, repo) sh.run(f"helm s3 push --force {chart_package} {repo}") return chart_name, chart_version, chart_package
def prepareAndValidateNotebooks(default_output_directory, notebooks): cc_region = os.environ.get("AWS_DEFAULT_REGION") # Get all git repos cc_repo_list = set( [ task["sourcePath"].split("/")[0] for task in notebooks["tasks"] if task["sourcePath"] and "codecommit::" in task["sourcePath"] ] ) logger.info(f"cc_repo_list={cc_repo_list}") # For each code repo, clone to specific repo name based folder. for cc_repo in cc_repo_list: repo_path = cc_repo.replace("::", f"::{cc_region}://") repo_name = cc_repo.split("::")[-1] logger.info(f"Cloning {repo_path}") sh.run(f"git clone {repo_path} /tmp/{repo_name}/") # sh.run(f"git clone codecommit::{cc_region}://orbit-iter-lake-user /tmp/{repo_name}/") reportsToRun = [] id = 1 for notebook in notebooks["tasks"]: key = "e{}".format(str(id)) id += 1 reportToRun = prepareNotebook(default_output_directory, notebook, key) reportsToRun.append(reportToRun) return reportsToRun
def destroy(plugin_id: str, context: "Context", team_context: "TeamContext", parameters: Dict[str, Any]) -> None: _logger.debug("Running emr_on_eks destroy!") sh.run(f"echo 'Team name: {team_context.name} | Plugin ID: {plugin_id}'") virtual_cluster_name = f"orbit-{context.name}-{team_context.name}" emr = boto3.client("emr-containers") response = emr.list_virtual_clusters( containerProviderId=f"orbit-{context.name}", containerProviderType="EKS", maxResults=500) if "virtualClusters" in response: for c in response["virtualClusters"]: if c["name"] == virtual_cluster_name: try: delete_response = emr.delete_virtual_cluster(id=c["id"]) _logger.debug("delete_virtual_cluster:", delete_response) except Exception as e: _logger.warning(e) pass cdk_destroy( stack_name=f"orbit-{context.name}-{team_context.name}-emr-on-eks", app_filename=os.path.join(ORBIT_EMR_ON_EKS_ROOT, "cdk.py"), context=context, team_context=team_context, parameters=parameters, )
def login_v2(account_id: str, region: str) -> None: username, password = ecr.get_credential() ecr_address = f"{account_id}.dkr.ecr.{region}.amazonaws.com" sh.run( f"docker login --username {username} --password {password} {ecr_address}", hide_cmd=True, ) _logger.debug("ECR logged in.")
def create_cluster_autoscaler_service_account(context: Context) -> None: policy_arn = f"arn:aws:iam::{context.account_id}:policy/orbit-{context.name}-cluster-autoscaler-policy" _logger.debug(f"Creating ClusterAutoscaler ServiceAccount with Policy ARN: {policy_arn}") sh.run( f"eksctl create iamserviceaccount --cluster=orbit-{context.name} --namespace=kube-system " f"--name=cluster-autoscaler --attach-policy-arn={policy_arn} --override-existing-serviceaccounts " "--approve" )
def tag_image(account_id: str, region: str, name: str, tag: str = "latest") -> None: ecr_address = f"{account_id}.dkr.ecr.{region}.amazonaws.com" remote_name = f"{ecr_address}/{name}" _logger.debug(f"Tagging {name}:{tag} as {remote_name}:{tag}") sh.run(f"docker tag {name}:{tag} {remote_name}:{tag}")
def tag_image(context: "Context", remote_name: str, remote_source: str, name: str, tag: str = "latest") -> None: ecr_address = f"{context.account_id}.dkr.ecr.{context.region}.amazonaws.com" if remote_source == "ecr" and not remote_name.startswith("public.ecr.aws"): remote_name = f"{ecr_address}/{remote_name}" sh.run(f"docker tag {remote_name}:{tag} {ecr_address}/{name}:{tag}")
def delete_istio_pod_disruption_budget(context: Context) -> None: _logger.debug("Deleteing istio-system pod disruption budgets") try: sh.run( "bash -c 'for pdb in `kubectl get poddisruptionbudgets -n istio-system -o custom-columns=:metadata.name`; " "do kubectl delete poddisruptionbudgets $pdb -n istio-system; done'" ) except: # noqa: E722 _logger.info("Unable to remove istio-system PDB")
def deploy_toolkit(context: T) -> None: if not (isinstance(context, Context) or isinstance(context, FoundationContext)): raise ValueError("Unknown 'context' Type") cmd: str = ( f"cdk bootstrap --toolkit-bucket-name {context.cdk_toolkit.s3_bucket} " f"--toolkit-stack-name {context.cdk_toolkit.stack_name} " f"{get_output_argument(context, context.cdk_toolkit.stack_name)} " f"aws://{context.account_id}/{context.region}") sh.run(cmd=cmd)
def deploy(context: T, stack_name: str, app_filename: str, args: List[str]) -> None: if not (isinstance(context, Context) or isinstance(context, FoundationContext)): raise ValueError("Unknown 'context' Type") cmd: str = ("cdk deploy --require-approval never --progress events " f"--toolkit-stack-name {context.cdk_toolkit.stack_name} " f"{get_app_argument(app_filename, args)} " f"{get_output_argument(context, stack_name)}") sh.run(cmd=cmd)
def init_env_repo(context: Context) -> str: repo_location = f"s3://{context.toolkit.s3_bucket}/helm/repositories/env" if not s3.object_exists(bucket=cast(str, context.toolkit.s3_bucket), key="helm/repositories/env/index.yaml"): _logger.debug("Initializing Env Helm Respository at %s", repo_location) sh.run(f"helm s3 init {repo_location}") else: _logger.debug("Skipping initialization of existing Env Helm Repository at %s", repo_location) context.helm_repository = repo_location return repo_location
def destroy(plugin_id: str, context: "Context", team_context: "TeamContext", parameters: Dict[str, Any]) -> None: _logger.debug("Delete Plugin %s of Team Env name: %s | Team name: %s", plugin_id, context.name, team_context.name) sh.run( f"kubectl delete jobs/team-script-{plugin_id} --namespace {team_context.name} --ignore-not-found" ) sh.run( f"kubectl delete configmap {plugin_id}-script --namespace {team_context.name} --ignore-not-found" )
def deploy_team(context: "Context", team_context: "TeamContext") -> None: eks_stack_name: str = f"eksctl-orbit-{context.name}-cluster" _logger.debug("EKSCTL stack name: %s", eks_stack_name) if cfn.does_stack_exist(stack_name=eks_stack_name): k8s_context = get_k8s_context(context=context) _logger.debug("kubectl context: %s", k8s_context) output_path = _generate_team_context(context=context, team_context=team_context) output_path = _generate_env_manifest(context=context, clean_up=False) sh.run(f"kubectl apply -f {output_path} --context {k8s_context} --wait") fetch_kubectl_data(context=context, k8s_context=k8s_context, include_teams=True)
def deploy_team(context: "Context", team_context: "TeamContext") -> None: eks_stack_name: str = f"eksctl-orbit-{context.name}-cluster" _logger.debug("EKSCTL stack name: %s", eks_stack_name) if cfn.does_stack_exist(stack_name=eks_stack_name): k8s_context = get_k8s_context(context=context) _logger.debug("kubectl context: %s", k8s_context) output_path = _generate_team_context(context=context, team_context=team_context) sh.run( f"kubectl apply -f {output_path} --context {k8s_context} --wait")
def destroy(plugin_id: str, context: "Context", team_context: "TeamContext", parameters: Dict[str, Any]) -> None: _logger.debug("Destroying Redshift plugin resources for team %s", team_context.name) sh.run(f"echo 'Team name: {team_context.name} | Plugin ID: {plugin_id}'") cdk_destroy( stack_name=f"orbit-{context.name}-{team_context.name}-{plugin_id}-redshift", app_filename=os.path.join(PLUGIN_ROOT_PATH, "orbit_redshift_stack.py"), context=context, team_context=team_context, parameters=parameters, )
def destroy_team_user_resources(team_name: str) -> None: try: sh.run( f"bash -c 'for ns in $(kubectl get namespaces --output=jsonpath={{.items..metadata.name}} " f"-l orbit/team={team_name},orbit/space=team); " f"do kubectl delete teamspace $ns -n $ns --force; " f"done'") except FailedShellCommand: _logger.error( "Failed toexecute command to delete teamspace object: %s")
def destroy_env(context: "Context") -> None: stack_name: str = f"orbit-{context.name}" final_eks_stack_name: str = f"eksctl-{stack_name}-cluster" _logger.debug("EKSCTL stack name: %s", final_eks_stack_name) if cfn.does_stack_exist(stack_name=final_eks_stack_name): revoke_cluster_pod_security_group(context=context) sh.run(f"eksctl utils write-kubeconfig --cluster orbit-{context.name} --set-kubeconfig-context") output_filename = generate_manifest(context=context, name=stack_name, nodegroups=context.managed_nodegroups) sh.run(f"eksctl delete cluster -f {output_filename} --wait --verbose 4") _logger.debug("EKSCTL Envrionment destroyed")
def destroy(plugin_id: str, context: "Context", team_context: "TeamContext", parameters: Dict[str, Any]) -> None: _logger.debug("Running hello_world destroy!") sh.run(f"echo 'Team name: {team_context.name} | Plugin ID: {plugin_id}'") cdk_destroy( stack_name=f"orbit-{context.name}-{team_context.name}-hello", app_filename=os.path.join(PLUGIN_ROOT_PATH, "hello_cdk.py"), context=context, team_context=team_context, parameters=parameters, )
def associate_open_id_connect_provider(context: Context, cluster_name: str) -> None: if ( iam.get_open_id_connect_provider( account_id=context.account_id, open_id_connect_provider_id=cast(str, context.eks_oidc_provider) ) is None ): _logger.debug("Associating OpenID Connect Provider") sh.run(f"eksctl utils associate-iam-oidc-provider --cluster {cluster_name} --approve") else: _logger.debug("OpenID Connect Provider already associated")
def install_chart(repo: str, namespace: str, name: str, chart_name: str, chart_version: str) -> None: chart_version = aws_orbit.__version__.replace(".dev", "-") _logger.debug("Installing %s, version %s as %s from %s", chart_name, chart_version, name, repo) try: sh.run(f"helm uninstall --debug {name} -n {namespace}") except exceptions.FailedShellCommand: _logger.debug("helm uninstall did not find the release") sh.run( f"helm upgrade --install --debug --namespace {namespace} --version " f"{chart_version} {name} {repo}/{chart_name}" )
def destroy_team(context: Context, team_context: TeamContext) -> None: eks_stack_name: str = f"eksctl-orbit-{context.name}-cluster" _logger.debug("EKSCTL stack name: %s", eks_stack_name) if cfn.does_stack_exist(stack_name=eks_stack_name) and team_context.team_helm_repository: kubectl.write_kubeconfig(context=context) uninstall_all_charts(namespace=team_context.name) if team_context.team_helm_repository: # delete helm charts for team repo sh.run(f"aws s3 rm --recursive {team_context.team_helm_repository}") if team_context.user_helm_repository: # delete heml charts for user repo sh.run(f"aws s3 rm --recursive {team_context.user_helm_repository}")
def ecr_pull_external(context: "Context", repository: str, tag: str = "latest") -> None: parts: List[str] = repository.split(".") if len(parts) < 6: raise ValueError(f"Invalid External ECR Repository: {repository}") external_account_id: str = parts[0] external_region: str = parts[3] login_ecr_only(context=context, account_id=external_account_id, region=external_region) sh.run(f"docker pull {repository}:{tag}")
def destroy_team(context: "Context", team_context: "TeamContext") -> None: eks_stack_name: str = f"eksctl-orbit-{context.name}-cluster" _logger.debug("EKSCTL stack name: %s", eks_stack_name) if cfn.does_stack_exist(stack_name=eks_stack_name): k8s_context = get_k8s_context(context=context) _logger.debug("kubectl k8s_context: %s", k8s_context) _logger.debug("Attempting kubectl delete for team %s", team_context.name) output_path = _generate_team_context(context=context, team_context=team_context) sh.run(f"kubectl delete -f {output_path} --grace-period=0 --force " f"--ignore-not-found --wait --context {k8s_context}")