def eks(ctx, profile): check_kubernetes_client(ctx) # Greet the user! echo( "Welcome to Metaflow! Follow the prompts to configure your " "installation.\n", bold=True, ) check_kubernetes_config(ctx) # Check for existing configuration. if not overwrite_config(profile): ctx.abort() verify_aws_credentials(ctx) existing_env = get_env(profile) env = existing_env.copy() if existing_env.get("METAFLOW_DEFAULT_DATASTORE") == "s3": # Skip S3 configuration if it is already configured pass elif not existing_env.get("METAFLOW_DEFAULT_DATASTORE"): env.update(configure_s3_datastore(existing_env)) else: # If configured to use something else, offer to switch to S3 click.confirm( "\nMetaflow on EKS needs to use S3 as a datastore, " + "but your existing configuration is not using S3. " + "Would you like to reconfigure it to use S3?", default=True, abort=True, ) env.update(configure_s3_datastore(existing_env)) # Configure remote metadata. if existing_env.get("METAFLOW_DEFAULT_METADATA") == "service": # Skip metadata service configuration if it is already configured pass else: if click.confirm( "\nMetaflow can use a " + yellow("remote Metadata Service to track") + " and persist flow execution metadata. \nWould you like to " "configure the Metadata Service?", default=True, abort=False, ): env.update(configure_metadata_service(existing_env)) # Configure AWS EKS for compute. env.update(configure_eks(existing_env)) persist_env({k: v for k, v in env.items() if v}, profile)
def pull(episode): tutorials_dir = get_tutorials_dir() if not episode: episodes = get_all_episodes() else: episodes = [episode] # Validate that the list is valid. for episode in episodes: validate_episode(episode) # Create destination `metaflow-tutorials` dir. dst_parent = os.path.join(os.getcwd(), "metaflow-tutorials") makedirs(dst_parent) # Pull specified episodes. for episode in episodes: dst_dir = os.path.join(dst_parent, episode) # Check if episode has already been pulled before. if os.path.exists(dst_dir): if click.confirm("Episode " + click.style('"{0}"'.format(episode), fg="red") + " has already been pulled before. Do you wish " "to delete the existing version?"): shutil.rmtree(dst_dir) else: continue echo("Pulling episode ", nl=False) echo('"{0}"'.format(episode), fg="cyan", nl=False) # TODO: Is the following redundant? echo(" into your current working directory.") # Copy from (local) metaflow package dir to current. src_dir = os.path.join(tutorials_dir, episode) shutil.copytree(src_dir, dst_dir) echo("\nTo know more about an episode, type:\n", nl=False) echo("metaflow tutorials info [EPISODE]", fg="cyan")
def reset(profile): check_for_missing_profile(profile) path = get_config_path(profile) if os.path.exists(path): if click.confirm( "Do you really wish to reset the configuration in " + click.style('"%s"' % path, fg="cyan"), abort=True, ): os.remove(path) echo("Configuration successfully reset to run locally.") else: echo("Configuration is already reset to run locally.")
def check_kubernetes_config(ctx): from kubernetes import config try: all_contexts, current_context = config.list_kube_config_contexts() click.confirm( "You have a valid kubernetes configuration. The current context is set to " + yellow(current_context["name"]) + " " + "Proceed?", default=True, abort=True, ) except config.config_exception.ConfigException as e: click.confirm( "\nYou don't seem to have a valid kubernetes configuration file. " + "The error from kubernetes client library: " + red(str(e)) + "." + "To create a kubernetes configuration for EKS, you typically need to run " + yellow("aws eks update-kubeconfig --name <CLUSTER NAME>") + ". For further details, refer to AWS Documentation at https://docs.aws.amazon.com/eks/latest/userguide/create-kubeconfig.html\n" "Do you want to proceed with configuring Metaflow for EKS anyway?", default=False, abort=True, )
def configure_datastore_and_metadata(existing_env): empty_profile = False if not existing_env: empty_profile = True env = {} # Configure Amazon S3 as the datastore. use_s3_as_datastore = click.confirm( "\nMetaflow can use " + yellow("Amazon S3 as the storage backend") + " for all code and data artifacts on " + "AWS.\nAmazon S3 is a strict requirement if you " + "intend to execute your flows on AWS Batch " + "and/or schedule them on AWS Step " + "Functions.\nWould you like to configure Amazon " + "S3 as the default storage backend?", default=empty_profile or existing_env.get("METAFLOW_DEFAULT_DATASTORE", "") == "s3", abort=False, ) if use_s3_as_datastore: env.update(configure_s3_datastore(existing_env)) # Configure Metadata service for tracking. if click.confirm( "\nMetaflow can use a " + yellow("remote Metadata Service to track") + " and persist flow execution metadata.\nConfiguring the " "service is a requirement if you intend to schedule your " "flows with AWS Step Functions.\nWould you like to " "configure the Metadata Service?", default=empty_profile or existing_env.get("METAFLOW_DEFAULT_METADATA", "") == "service" or "METAFLOW_SFN_IAM_ROLE" in env, abort=False, ): env.update(configure_metadata_service(existing_env)) return env
def overwrite_config(profile): path = get_config_path(profile) if os.path.exists(path): if not click.confirm( click.style( "We found an existing configuration for your " + "profile. Do you want to modify the existing " + "configuration?", fg="red", bold=True, )): echo( "You can configure a different named profile by using the " "--profile argument. You can activate this profile by setting " "the environment variable METAFLOW_PROFILE to the named " "profile.", fg="yellow", ) return False return True
def verify_aws_credentials(ctx): # Verify that the user has configured AWS credentials on their computer. if not click.confirm( "\nMetaflow relies on " + yellow("AWS access credentials") + " present on your computer to access resources on AWS." "\nBefore proceeding further, please confirm that you " "have already configured these access credentials on " "this computer.", default=True, ): echo( "There are many ways to setup your AWS access credentials. You " "can get started by following this guide: ", nl=False, fg="yellow", ) echo( "https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html", fg="cyan", ) ctx.abort()
def aws(ctx, profile): # Greet the user! echo( "Welcome to Metaflow! Follow the prompts to configure your " "installation.\n", bold=True, ) # Check for existing configuration. if not overwrite_config(profile): ctx.abort() verify_aws_credentials(ctx) existing_env = get_env(profile) empty_profile = False if not existing_env: empty_profile = True env = {} env.update(configure_datastore_and_metadata(existing_env)) # Configure AWS Batch for compute if using S3 if env.get("METAFLOW_DEFAULT_DATASTORE") == "s3": if click.confirm( "\nMetaflow can scale your flows by " + yellow("executing your steps on AWS Batch") + ".\nAWS Batch is a strict requirement if you intend " "to schedule your flows on AWS Step Functions.\nWould " "you like to configure AWS Batch as your compute " "backend?", default=empty_profile or "METAFLOW_BATCH_JOB_QUEUE" in existing_env, abort=False, ): env.update(configure_aws_batch(existing_env)) persist_env({k: v for k, v in env.items() if v}, profile)
def export(profile, output_filename): check_for_missing_profile(profile) # Export its contents to a new file. path = get_config_path(profile) env_dict = {} if os.path.exists(path): with open(path, "r") as f: env_dict = json.load(f) # resolve_path doesn't expand `~` in `path`. output_path = expanduser(output_filename) if os.path.exists(output_path): if click.confirm( "Do you wish to overwrite the contents in " + click.style('"%s"' % output_path, fg="cyan") + "?", abort=True, ): pass # Write to file. with open(output_path, "w") as f: json.dump(env_dict, f, indent=4, sort_keys=True) echo("Configuration successfully exported to: ", nl=False) echo('"%s"' % output_path, fg="cyan")
def configure_aws_batch(existing_env): empty_profile = False if not existing_env: empty_profile = True env = {} # Set AWS Batch Job Queue. env["METAFLOW_BATCH_JOB_QUEUE"] = click.prompt( cyan("[METAFLOW_BATCH_JOB_QUEUE]") + " AWS Batch Job Queue.", default=existing_env.get("METAFLOW_BATCH_JOB_QUEUE"), show_default=True, ) # Set IAM role for AWS Batch jobs to assume. env["METAFLOW_ECS_S3_ACCESS_IAM_ROLE"] = click.prompt( cyan("[METAFLOW_ECS_S3_ACCESS_IAM_ROLE]") + " IAM role for AWS Batch jobs to access AWS " + "resources (Amazon S3 etc.).", default=existing_env.get("METAFLOW_ECS_S3_ACCESS_IAM_ROLE"), show_default=True, ) # Set default Docker repository for AWS Batch jobs. env["METAFLOW_BATCH_CONTAINER_REGISTRY"] = click.prompt( cyan("[METAFLOW_BATCH_CONTAINER_REGISTRY]") + yellow(" (optional)") + " Default Docker image repository for AWS " + "Batch jobs. If nothing is specified, " + "dockerhub (hub.docker.com/) is " + "used as default.", default=existing_env.get("METAFLOW_BATCH_CONTAINER_REGISTRY", ""), show_default=True, ) # Set default Docker image for AWS Batch jobs. env["METAFLOW_BATCH_CONTAINER_IMAGE"] = click.prompt( cyan("[METAFLOW_BATCH_CONTAINER_IMAGE]") + yellow(" (optional)") + " Default Docker image for AWS Batch jobs. " + "If nothing is specified, an appropriate " + "python image is used as default.", default=existing_env.get("METAFLOW_BATCH_CONTAINER_IMAGE", ""), show_default=True, ) # Configure AWS Step Functions for scheduling. if click.confirm( "\nMetaflow can " + yellow("schedule your flows on AWS Step " "Functions") + " and trigger them at a specific cadence using " "Amazon EventBridge.\nTo support flows involving " "foreach steps, you would need access to AWS " "DynamoDB.\nWould you like to configure AWS Step " "Functions for scheduling?", default=empty_profile or "METAFLOW_SFN_IAM_ROLE" in existing_env, abort=False, ): # Configure IAM role for AWS Step Functions. env["METAFLOW_SFN_IAM_ROLE"] = click.prompt( cyan("[METAFLOW_SFN_IAM_ROLE]") + " IAM role for AWS Step Functions to " + "access AWS resources (AWS Batch, " + "AWS DynamoDB).", default=existing_env.get("METAFLOW_SFN_IAM_ROLE"), show_default=True, ) # Configure IAM role for AWS Events Bridge. env["METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE"] = click.prompt( cyan("[METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE]") + " IAM role for Amazon EventBridge to " + "access AWS Step Functions.", default=existing_env.get("METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE"), show_default=True, ) # Configure AWS DynamoDB Table for AWS Step Functions. env["METAFLOW_SFN_DYNAMO_DB_TABLE"] = click.prompt( cyan("[METAFLOW_SFN_DYNAMO_DB_TABLE]") + " AWS DynamoDB table name for tracking " + "AWS Step Functions execution metadata.", default=existing_env.get("METAFLOW_SFN_DYNAMO_DB_TABLE"), show_default=True, ) return env