Example #1
0
def eks(ctx, profile):

    check_kubernetes_client(ctx)

    # Greet the user!
    echo(
        "Welcome to Metaflow! Follow the prompts to configure your "
        "installation.\n",
        bold=True,
    )

    check_kubernetes_config(ctx)

    # Check for existing configuration.
    if not overwrite_config(profile):
        ctx.abort()

    verify_aws_credentials(ctx)

    existing_env = get_env(profile)

    env = existing_env.copy()

    if existing_env.get("METAFLOW_DEFAULT_DATASTORE") == "s3":
        # Skip S3 configuration if it is already configured
        pass
    elif not existing_env.get("METAFLOW_DEFAULT_DATASTORE"):
        env.update(configure_s3_datastore(existing_env))
    else:
        # If configured to use something else, offer to switch to S3
        click.confirm(
            "\nMetaflow on EKS needs to use S3 as a datastore, " +
            "but your existing configuration is not using S3. " +
            "Would you like to reconfigure it to use S3?",
            default=True,
            abort=True,
        )
        env.update(configure_s3_datastore(existing_env))

    # Configure remote metadata.
    if existing_env.get("METAFLOW_DEFAULT_METADATA") == "service":
        # Skip metadata service configuration if it is already configured
        pass
    else:
        if click.confirm(
                "\nMetaflow can use a " +
                yellow("remote Metadata Service to track") +
                " and persist flow execution metadata. \nWould you like to "
                "configure the Metadata Service?",
                default=True,
                abort=False,
        ):
            env.update(configure_metadata_service(existing_env))

    # Configure AWS EKS for compute.
    env.update(configure_eks(existing_env))

    persist_env({k: v for k, v in env.items() if v}, profile)
Example #2
0
def pull(episode):
    tutorials_dir = get_tutorials_dir()
    if not episode:
        episodes = get_all_episodes()
    else:
        episodes = [episode]
        # Validate that the list is valid.
        for episode in episodes:
            validate_episode(episode)
    # Create destination `metaflow-tutorials` dir.
    dst_parent = os.path.join(os.getcwd(), "metaflow-tutorials")
    makedirs(dst_parent)

    # Pull specified episodes.
    for episode in episodes:
        dst_dir = os.path.join(dst_parent, episode)
        # Check if episode has already been pulled before.
        if os.path.exists(dst_dir):
            if click.confirm("Episode " +
                             click.style('"{0}"'.format(episode), fg="red") +
                             " has already been pulled before. Do you wish "
                             "to delete the existing version?"):
                shutil.rmtree(dst_dir)
            else:
                continue
        echo("Pulling episode ", nl=False)
        echo('"{0}"'.format(episode), fg="cyan", nl=False)
        # TODO: Is the following redundant?
        echo(" into your current working directory.")
        # Copy from (local) metaflow package dir to current.
        src_dir = os.path.join(tutorials_dir, episode)
        shutil.copytree(src_dir, dst_dir)

    echo("\nTo know more about an episode, type:\n", nl=False)
    echo("metaflow tutorials info [EPISODE]", fg="cyan")
Example #3
0
def reset(profile):
    check_for_missing_profile(profile)
    path = get_config_path(profile)
    if os.path.exists(path):
        if click.confirm(
                "Do you really wish to reset the configuration in " +
                click.style('"%s"' % path, fg="cyan"),
                abort=True,
        ):
            os.remove(path)
            echo("Configuration successfully reset to run locally.")
    else:
        echo("Configuration is already reset to run locally.")
Example #4
0
def check_kubernetes_config(ctx):
    from kubernetes import config

    try:
        all_contexts, current_context = config.list_kube_config_contexts()
        click.confirm(
            "You have a valid kubernetes configuration. The current context is set to "
            + yellow(current_context["name"]) + " " + "Proceed?",
            default=True,
            abort=True,
        )
    except config.config_exception.ConfigException as e:
        click.confirm(
            "\nYou don't seem to have a valid kubernetes configuration file. "
            + "The error from kubernetes client library: " + red(str(e)) +
            "." +
            "To create a kubernetes configuration for EKS, you typically need to run "
            + yellow("aws eks update-kubeconfig --name <CLUSTER NAME>") +
            ". For further details, refer to AWS Documentation at https://docs.aws.amazon.com/eks/latest/userguide/create-kubeconfig.html\n"
            "Do you want to proceed with configuring Metaflow for EKS anyway?",
            default=False,
            abort=True,
        )
Example #5
0
def configure_datastore_and_metadata(existing_env):
    empty_profile = False
    if not existing_env:
        empty_profile = True
    env = {}

    # Configure Amazon S3 as the datastore.
    use_s3_as_datastore = click.confirm(
        "\nMetaflow can use " + yellow("Amazon S3 as the storage backend") +
        " for all code and data artifacts on " +
        "AWS.\nAmazon S3 is a strict requirement if you " +
        "intend to execute your flows on AWS Batch " +
        "and/or schedule them on AWS Step " +
        "Functions.\nWould you like to configure Amazon " +
        "S3 as the default storage backend?",
        default=empty_profile
        or existing_env.get("METAFLOW_DEFAULT_DATASTORE", "") == "s3",
        abort=False,
    )
    if use_s3_as_datastore:
        env.update(configure_s3_datastore(existing_env))

    # Configure Metadata service for tracking.
    if click.confirm(
            "\nMetaflow can use a " +
            yellow("remote Metadata Service to track") +
            " and persist flow execution metadata.\nConfiguring the "
            "service is a requirement if you intend to schedule your "
            "flows with AWS Step Functions.\nWould you like to "
            "configure the Metadata Service?",
            default=empty_profile
            or existing_env.get("METAFLOW_DEFAULT_METADATA", "") == "service"
            or "METAFLOW_SFN_IAM_ROLE" in env,
            abort=False,
    ):
        env.update(configure_metadata_service(existing_env))
    return env
Example #6
0
def overwrite_config(profile):
    path = get_config_path(profile)
    if os.path.exists(path):
        if not click.confirm(
                click.style(
                    "We found an existing configuration for your " +
                    "profile. Do you want to modify the existing " +
                    "configuration?",
                    fg="red",
                    bold=True,
                )):
            echo(
                "You can configure a different named profile by using the "
                "--profile argument. You can activate this profile by setting "
                "the environment variable METAFLOW_PROFILE to the named "
                "profile.",
                fg="yellow",
            )
            return False
    return True
Example #7
0
def verify_aws_credentials(ctx):
    # Verify that the user has configured AWS credentials on their computer.
    if not click.confirm(
            "\nMetaflow relies on " + yellow("AWS access credentials") +
            " present on your computer to access resources on AWS."
            "\nBefore proceeding further, please confirm that you "
            "have already configured these access credentials on "
            "this computer.",
            default=True,
    ):
        echo(
            "There are many ways to setup your AWS access credentials. You "
            "can get started by following this guide: ",
            nl=False,
            fg="yellow",
        )
        echo(
            "https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html",
            fg="cyan",
        )
        ctx.abort()
Example #8
0
def aws(ctx, profile):

    # Greet the user!
    echo(
        "Welcome to Metaflow! Follow the prompts to configure your "
        "installation.\n",
        bold=True,
    )

    # Check for existing configuration.
    if not overwrite_config(profile):
        ctx.abort()

    verify_aws_credentials(ctx)

    existing_env = get_env(profile)
    empty_profile = False
    if not existing_env:
        empty_profile = True

    env = {}
    env.update(configure_datastore_and_metadata(existing_env))

    # Configure AWS Batch for compute if using S3
    if env.get("METAFLOW_DEFAULT_DATASTORE") == "s3":
        if click.confirm(
                "\nMetaflow can scale your flows by " +
                yellow("executing your steps on AWS Batch") +
                ".\nAWS Batch is a strict requirement if you intend "
                "to schedule your flows on AWS Step Functions.\nWould "
                "you like to configure AWS Batch as your compute "
                "backend?",
                default=empty_profile
                or "METAFLOW_BATCH_JOB_QUEUE" in existing_env,
                abort=False,
        ):
            env.update(configure_aws_batch(existing_env))

    persist_env({k: v for k, v in env.items() if v}, profile)
Example #9
0
def export(profile, output_filename):
    check_for_missing_profile(profile)
    # Export its contents to a new file.
    path = get_config_path(profile)
    env_dict = {}
    if os.path.exists(path):
        with open(path, "r") as f:
            env_dict = json.load(f)
    # resolve_path doesn't expand `~` in `path`.
    output_path = expanduser(output_filename)
    if os.path.exists(output_path):
        if click.confirm(
                "Do you wish to overwrite the contents in " +
                click.style('"%s"' % output_path, fg="cyan") + "?",
                abort=True,
        ):
            pass
    # Write to file.
    with open(output_path, "w") as f:
        json.dump(env_dict, f, indent=4, sort_keys=True)
    echo("Configuration successfully exported to: ", nl=False)
    echo('"%s"' % output_path, fg="cyan")
Example #10
0
def configure_aws_batch(existing_env):
    empty_profile = False
    if not existing_env:
        empty_profile = True
    env = {}

    # Set AWS Batch Job Queue.
    env["METAFLOW_BATCH_JOB_QUEUE"] = click.prompt(
        cyan("[METAFLOW_BATCH_JOB_QUEUE]") + " AWS Batch Job Queue.",
        default=existing_env.get("METAFLOW_BATCH_JOB_QUEUE"),
        show_default=True,
    )
    # Set IAM role for AWS Batch jobs to assume.
    env["METAFLOW_ECS_S3_ACCESS_IAM_ROLE"] = click.prompt(
        cyan("[METAFLOW_ECS_S3_ACCESS_IAM_ROLE]") +
        " IAM role for AWS Batch jobs to access AWS " +
        "resources (Amazon S3 etc.).",
        default=existing_env.get("METAFLOW_ECS_S3_ACCESS_IAM_ROLE"),
        show_default=True,
    )
    # Set default Docker repository for AWS Batch jobs.
    env["METAFLOW_BATCH_CONTAINER_REGISTRY"] = click.prompt(
        cyan("[METAFLOW_BATCH_CONTAINER_REGISTRY]") + yellow(" (optional)") +
        " Default Docker image repository for AWS " +
        "Batch jobs. If nothing is specified, " +
        "dockerhub (hub.docker.com/) is " + "used as default.",
        default=existing_env.get("METAFLOW_BATCH_CONTAINER_REGISTRY", ""),
        show_default=True,
    )
    # Set default Docker image for AWS Batch jobs.
    env["METAFLOW_BATCH_CONTAINER_IMAGE"] = click.prompt(
        cyan("[METAFLOW_BATCH_CONTAINER_IMAGE]") + yellow(" (optional)") +
        " Default Docker image for AWS Batch jobs. " +
        "If nothing is specified, an appropriate " +
        "python image is used as default.",
        default=existing_env.get("METAFLOW_BATCH_CONTAINER_IMAGE", ""),
        show_default=True,
    )

    # Configure AWS Step Functions for scheduling.
    if click.confirm(
            "\nMetaflow can " + yellow("schedule your flows on AWS Step "
                                       "Functions") +
            " and trigger them at a specific cadence using "
            "Amazon EventBridge.\nTo support flows involving "
            "foreach steps, you would need access to AWS "
            "DynamoDB.\nWould you like to configure AWS Step "
            "Functions for scheduling?",
            default=empty_profile or "METAFLOW_SFN_IAM_ROLE" in existing_env,
            abort=False,
    ):
        # Configure IAM role for AWS Step Functions.
        env["METAFLOW_SFN_IAM_ROLE"] = click.prompt(
            cyan("[METAFLOW_SFN_IAM_ROLE]") +
            " IAM role for AWS Step Functions to " +
            "access AWS resources (AWS Batch, " + "AWS DynamoDB).",
            default=existing_env.get("METAFLOW_SFN_IAM_ROLE"),
            show_default=True,
        )
        # Configure IAM role for AWS Events Bridge.
        env["METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE"] = click.prompt(
            cyan("[METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE]") +
            " IAM role for Amazon EventBridge to " +
            "access AWS Step Functions.",
            default=existing_env.get("METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE"),
            show_default=True,
        )
        # Configure AWS DynamoDB Table for AWS Step Functions.
        env["METAFLOW_SFN_DYNAMO_DB_TABLE"] = click.prompt(
            cyan("[METAFLOW_SFN_DYNAMO_DB_TABLE]") +
            " AWS DynamoDB table name for tracking " +
            "AWS Step Functions execution metadata.",
            default=existing_env.get("METAFLOW_SFN_DYNAMO_DB_TABLE"),
            show_default=True,
        )
    return env