def describe_runs(n=0, notebook=None, rule=None, session=None):
    """Returns a generator of descriptions for all the notebook runs. See :meth:`describe_run` for details of
    the description.

    Args:
       n (int): The number of runs to return or all runs if 0 (default: 0)
       notebook (str): If not None, return only runs of this notebook (default: None)
       rule (str): If not None, return only runs invoked by this rule (default: None)
       session (boto3.Session): The boto3 session to use. Will create a default session if not supplied (default: None).
    """
    session = ensure_session(session)
    client = session.client("sagemaker")
    paginator = client.get_paginator("list_processing_jobs")
    page_iterator = paginator.paginate(NameContains="papermill-")

    for page in page_iterator:
        for item in page["ProcessingJobSummaries"]:
            job_name = item["ProcessingJobName"]
            if not job_name.startswith("papermill-"):
                continue
            d = describe_run(job_name, session)

            if notebook != None and notebook != d["Notebook"]:
                continue
            if rule != None and rule != d["Rule"]:
                continue
            yield d

            if n > 0:
                n = n - 1
                if n == 0:
                    return
def wait_for_complete(job_name, progress=True, sleep_time=10, session=None):
    """Wait for a notebook execution job to complete.

    Args:
      job_name (str):
        The name of the SageMaker Processing Job executing the notebook. (Required)
      progress (boolean):
        If True, print a period after every poll attempt. (Default: True)
      sleep_time (int):
        The number of seconds between polls. (Default: 10)
      session (boto3.Session):
        A boto3 session to use. Will create a default session if not supplied. (Default: None)

    Returns:
      A tuple with the job status and the failure message if any.
    """

    session = ensure_session(session)
    client = session.client("sagemaker")
    done = False
    while not done:
        if progress:
            print(".", end="")
        desc = client.describe_processing_job(ProcessingJobName=job_name)
        status = desc["ProcessingJobStatus"]
        if status != "InProgress":
            done = True
        else:
            time.sleep(sleep_time)
    if progress:
        print()
    return status, desc.get("ExitMessage")
def upload_notebook(notebook, session=None):
    """Uploads a notebook to S3 in the default SageMaker Python SDK bucket for
    this user. The resulting S3 object will be named "s3://<bucket>/papermill-input/notebook-YYYY-MM-DD-hh-mm-ss.ipynb".

    Args:
      notebook (str):
        The filename of the notebook you want to upload. (Required)
      session (boto3.Session):
        A boto3 session to use. Will create a default session if not supplied. (Default: None)

    Returns:
      The resulting object name in S3 in URI format.
    """
    session = ensure_session(session)
    s3 = session.client("s3")
    bucket = default_bucket(session)
    prefix = f"papermill_input/{time.strftime('%Y-%m-%d-%H-%M-%S', time.gmtime())}"

    directory, nb_filename = os.path.split(notebook)
    for root, dirs, files in os.walk(directory, followlinks=True):
        for filename in files:
            local_path = os.path.join(root, filename)
            relative_path = os.path.relpath(local_path, directory)
            s3_path = os.path.join(prefix, relative_path)
            try:
                s3.head_object(Bucket=bucket, Key=s3_path)
            except:
                s3.upload_file(local_path, bucket, s3_path)

    return f"s3://{bucket}/{prefix}/"
    def __init__(self, max_jobs=20, session=None, log=None):
        self.session = ensure_session(session)
        self.client = self.session.client("sagemaker")
        self.log = log or logging.getLogger(__name__)
        self.max_jobs = max_jobs

        self.new_jobs = NewJobs(self.client)
        self.run_list = []
        self.in_progress = {}
def stop_run(job_name, session=None):
    """Stop the named processing job

    Args:
       job_name (string): The name of the job to stop
       session (boto3.Session): The boto3 session to use. Will create a default session if not supplied (default: None)."""
    session = ensure_session(session)
    client = session.client("sagemaker")
    client.stop_processing_job(ProcessingJobName=job_name)
def save_csv_to_s3(df, csv_name):
    session = ensure_session()

    df.to_csv(csv_name, index=False)

    s3 = session.client("s3")
    bucket = default_bucket(session)
    prefix = "full_repo_scan"

    s3_path = os.path.join(prefix, csv_name)
    s3.upload_file(csv_name, bucket, s3_path)

    return f"s3://{bucket}/{prefix}/{csv_name}"
def describe(job_name, session):
    """Get the status and exit message for a Processing job.

    Args:
        job_name (str):
        session:

    Returns:
        (str, str): A tuple with the status and the exit message.

    """
    session = ensure_session(session)
    client = session.client("sagemaker")
    response = client.describe_processing_job(ProcessingJobName=job_name)
    return response["ProcessingJobStatus"], response.get("ExitMessage")
def run_notebook(
    image,
    notebook,
    parameters={},
    role=None,
    instance_type="ml.m5.large",
    output_prefix=None,
    output=".",
    session=None,
):
    """Run a notebook in SageMaker Processing producing a new output notebook.

    Args:
        image (str): The ECR image that defines the environment to run the job (required).
        notebook (str): The local notebook to upload and run (required).
        parameters (dict): The dictionary of parameters to pass to the notebook (default: {}).
        role (str): The name of a role to use to run the notebook (default: calls get_execution_role()).
        instance_type (str): The SageMaker instance to use for executing the job (default: ml.m5.large).
        output_prefix (str): The prefix path in S3 for where to store the output notebook
                             (default: determined based on SageMaker Python SDK)
        output (str): The directory to copy the output file to (default: the current working directory).
        session (boto3.Session): The boto3 session to use. Will create a default session if not supplied (default: None).

    Returns:
        A tuple with the processing job name, the job status, the failure reason (or None) and the the path to
        the result notebook. The output notebook name is formed by adding a timestamp to the original notebook name.
    """
    session = ensure_session(session)
    if output_prefix is None:
        output_prefix = get_output_prefix()
    s3path = upload_notebook(notebook, session)
    job_name = execute_notebook(
        image=image,
        input_path=s3path,
        output_prefix=output_prefix,
        notebook=notebook,
        parameters=parameters,
        role=role,
        instance_type=instance_type,
        session=session,
    )
    print(f"Job {job_name} started")
    status, failure_reason = wait_for_complete(job_name)
    if status == "Completed":
        local = download_notebook(job_name, output=output)
    else:
        local = None
    return (job_name, status, local, failure_reason)
def get_output_notebook(job_name, session=None):
    """Get the name and S3 uri for an output notebook from a previously completed job.

    Args:
      job_name (str): The name of the SageMaker Processing Job that executed the notebook. (Required)
      session (boto3.Session):
        A boto3 session to use. Will create a default session if not supplied. (Default: None)

    Returns:
        (str, str): A tuple with the notebook name and S3 uri to the output notebook.
    """
    session = ensure_session(session)
    client = session.client("sagemaker")
    desc = client.describe_processing_job(ProcessingJobName=job_name)

    prefix = desc["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
    notebook = os.path.basename(desc["Environment"]["PAPERMILL_OUTPUT"])
    return notebook, f"{prefix}/{notebook}"
def download_all(lis, output=".", session=None):
    """Download each of the output notebooks from a list previously completed jobs.

    Args:
      lis (list, pandas.Series, or pandas.DataFrame): A list of jobs or a pandas DataFrame with a "Job" column (as returned by :meth:`list_runs`). (Required)
      output (str): The directory to copy the output files to. (Default: the current working directory)
      session (boto3.Session):
        A boto3 session to use. Will create a default session if not supplied. (Default: None)

    Returns:
      The list of the filenames of the downloaded notebooks.
    """
    import pandas as pd  # pylint: disable=import-error

    if isinstance(lis, pd.DataFrame):
        lis = list(lis["Job"])
    elif isinstance(lis, pd.Series):
        lis = list(lis)

    session = ensure_session(session)
    return [download_notebook(job, output, session) for job in lis]
Ejemplo n.º 11
0
def main():
    args = parse_args(sys.argv[1:])
    skip_args = {
        "docker": args.skip_docker,
        "local_mode": args.skip_local,
        "fsx_esx": args.skip_filesystem
    }

    notebook_names = parse.all_notebook_filenames()
    job_names = []
    kernel_names = []
    session = ensure_session()
    instance_type = args.instance or "ml.m5.xlarge"
    for notebook in notebook_names:
        if parse.is_notebook_skipped(notebook, skip_args):
            job_name = None
        else:
            image = kernels.kernel_image_for(notebook)
            s3path = upload_notebook(notebook, session)
            parameters = {"kms_key": kms_key()}
            job_name = execute_notebook(
                image=image,
                input_path=s3path,
                notebook=notebook,
                instance_type=instance_type,
                session=session,
                output_prefix=get_output_prefix(),
                parameters=parameters,
            )
            time.sleep(1)

        print(job_name)
        job_names.append(str(job_name))
        kernel_names.append(kernels.kernel_type_for(notebook))

    print("\n" * 2)
    print("-" * 100)
    print("\n" * 2)
    print(save_csv_to_s3(notebook_names, job_names, kernel_names))
Ejemplo n.º 12
0
def save_csv_to_s3(notebooks, job_names, kernels):
    session = ensure_session()

    df = pd.DataFrame({
        "filename": notebooks,
        "processing-job-name": job_names,
        "kernel": kernels
    })

    csv_name = f"{time.strftime('%Y-%m-%d-%H-%M-%S', time.gmtime())}.csv"
    df.to_csv(csv_name, index=False)

    s3 = session.client("s3")
    bucket = default_bucket(session)
    prefix = "full_repo_scan"

    s3_path = os.path.join(prefix, csv_name)
    try:
        s3.head_object(Bucket=bucket, Key=s3_path)
    except:
        s3.upload_file(csv_name, bucket, s3_path)

    return f"s3://{bucket}/{prefix}/{csv_name}"
def upload_fileobj(notebook_fileobj, session=None):
    """Uploads a file object to S3 in the default SageMaker Python SDK bucket for
    this user. The resulting S3 object will be named "s3://<bucket>/papermill-input/notebook-YYYY-MM-DD-hh-mm-ss.ipynb".

    Args:
      notebook_fileobj (fileobj):
        A file object (as returned from open) that is reading from the notebook you want to upload. (Required)
      session (boto3.Session):
        A boto3 session to use. Will create a default session if not supplied. (Default: None)

    Returns:
      The resulting object name in S3 in URI format.
    """

    session = ensure_session(session)
    snotebook = f"notebook-{time.strftime('%Y-%m-%d-%H-%M-%S', time.gmtime())}.ipynb"

    s3 = session.client("s3")
    key = "papermill_input/" + snotebook
    bucket = default_bucket(session)
    s3path = f"s3://{bucket}/{key}"
    s3.upload_fileobj(notebook_fileobj, bucket, key)

    return s3path
def main():
    args = parse_args(sys.argv[1:])

    session = ensure_session()

    csv_filename = args.csv
    df = pd.read_csv(csv_filename, index_col=False)

    output_notebooks = []
    runtimes = []
    statuses = []
    errors = []
    dates = []

    sagemaker = session.client("sagemaker")
    for index, row in df.iterrows():
        job_name = row["processing-job-name"]
        if job_name == "None":
            uri = "None"
            runtime = 0
            status = "Skipped"
            error = "UsesDocker"
            date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
        else:
            response = sagemaker.describe_processing_job(ProcessingJobName=job_name)
            notebook, uri = get_output_notebook(job_name, session)
            runtime = (
                response.get("ProcessingEndTime", datetime.now(timezone.utc))
                - response.get("ProcessingStartTime", datetime.now(timezone.utc))
            ).total_seconds()
            status = response.get("ProcessingJobStatus")
            date = response.get("ProcessingEndTime", datetime.now(timezone.utc)).strftime(
                "%Y-%m-%d"
            )

            error = response.get("ExitMessage")
            if error == "Kernel died":
                error = "KernelDied"
            elif error:
                lines = error.splitlines()
                error_message = lines[-1]
                error_type, error_details = error_message.split(":", 1)
                error = error_type or "Uncategorized"

        output_notebooks.append(uri)
        runtimes.append(runtime)
        statuses.append(status)
        errors.append(error)
        dates.append(date)

        print(job_name)
        time.sleep(1)

    df["output"] = output_notebooks
    df["runtime"] = runtimes
    df["status"] = statuses
    df["error"] = errors

    df.insert(loc=0, column="date", value=dates)

    print("\n" * 2)
    print("-" * 100)
    print("\n" * 2)
    print(save_csv_to_s3(df, csv_filename))
def main():
    args = parse_args(sys.argv[1:])

    session = ensure_session()

    csv_filename = args.csv
    dataframe = pd.read_csv(csv_filename, index_col=False)

    output_notebooks = []
    runtimes = []
    statuses = []
    errors = []
    dates = []
    error_details = []

    sagemaker = session.client("sagemaker")
    for index, row in dataframe.iterrows():
        job_name = row["processing-job-name"]
        detail = None
        if job_name == "None":
            uri = "None"
            runtime = 0
            status = "Skipped"
            error = None
            date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
        else:
            response = sagemaker.describe_processing_job(
                ProcessingJobName=job_name)
            date = response.get("ProcessingEndTime", datetime.now(
                timezone.utc)).strftime("%Y-%m-%d")
            notebook, uri = get_output_notebook(job_name, session)
            status = response.get("ProcessingJobStatus")

            runtime = (
                response.get("ProcessingEndTime", datetime.now(timezone.utc)) -
                response.get("ProcessingStartTime", datetime.now(
                    timezone.utc))).total_seconds()
            if runtime < 0:
                runtime = 0

            error = response.get("ExitMessage")
            if error == "Kernel died":
                error = "KernelDied"
                detail = "kernel died"
            elif error:
                found_error_type = False
                valid_error_types = ("Exception:", "Error:", "InvalidArn:",
                                     "NotFound:", "InUse:")
                lines = error.splitlines()
                for line in reversed(lines):
                    if any(error_type in line
                           for error_type in valid_error_types):
                        error_parsed = line.split(":", 1)
                        print(
                            "The following error was encountered while executing the notebook"
                        )
                        print(line)
                        error = error_parsed[0]
                        detail = error_parsed[1]
                        found_error_type = True
                        break
                if not found_error_type:
                    error = "Uncategorized"

            if status == "Stopped":
                error = "TimedOut"
                detail = "Notebook execution timed out"

        output_notebooks.append(uri)
        runtimes.append(runtime)
        statuses.append(status)
        errors.append(error)
        dates.append(date)
        error_details.append(detail)

        print(job_name)
        time.sleep(1)

    new_dataframe = pd.DataFrame({
        "date":
        dates,
        "filename":
        dataframe["filename"],
        "processing-job-name":
        dataframe["processing-job-name"],
        "kernel":
        dataframe["kernel"],
        "output":
        output_notebooks,
        "runtime":
        runtimes,
        "status":
        statuses,
        "error":
        errors,
        "error_detail":
        error_details
    })

    print("\n" * 2)
    print("-" * 100)
    print("\n" * 2)
    print(save_csv_to_s3(new_dataframe, csv_filename))
Ejemplo n.º 16
0
def main():
    args = parse_args(sys.argv[1:])
    skip_args = {
        "docker": args.skip_docker,
        "local_mode": args.skip_local,
        "fsx_esx": args.skip_filesystem
    }
    jobs = {}
    session = ensure_session()
    instance_type = args.instance or "ml.m5.xlarge"
    for notebook in parse.pr_notebook_filenames(args.pr):
        if parse.is_notebook_skipped(notebook, skip_args):
            job_name = None
        else:
            image = kernels.kernel_image_for(notebook)
            s3path = upload_notebook(notebook, session)
            parameters = {"kms_key": kms_key()}
            job_name = execute_notebook(
                image=image,
                input_path=s3path,
                notebook=notebook,
                role="SageMakerRole",
                instance_type=instance_type,
                session=session,
                output_prefix=get_output_prefix(),
                parameters=parameters,
            )
            time.sleep(1)

        jobs[notebook] = job_name

    failures = {}

    while jobs:
        for notebook in list(jobs):
            job_name = jobs[notebook]
            if not is_running(job_name, session):
                if job_name:
                    status, failure_reason = wait_for_complete(
                        job_name, progress=False, session=session
                    )
                else:
                    status, failure_reason = (
                        "Skipped",
                        "This notebook was skipped because it either uses Docker or Local Mode.",
                    )

                basename = os.path.basename(notebook)
                print("\n" * 2)
                print(f"* {basename} " + "*" * (97 - len(basename)))
                print("*")
                print(f"* {'job name':>11}: {str(job_name):<11}")
                print("*")
                print(f"* {'kernel':>11}: {kernels.kernel_type_for(notebook):<11}")
                print("*")
                print(f"* {'status':>11}: {status:<11}")
                print("*")
                if status != "Completed":
                    print(failure_reason)
                    if status != "Skipped":
                        failures[notebook] = failure_reason
                jobs.pop(notebook)
            time.sleep(10)

    print("\n" * 2)
    print("-" * 100)
    if failures:
        raise Exception(
            "One or more notebooks failed to execute. Please see above for error messages. "
            "If you need more information, please see the CloudWatch logs for the corresponding Processing job."
        )
def describe_run(job_name, session=None):
    """Describe a particular notebook run.

    Args:
     job_name (str): The name of the processing job that ran the notebook.

    Returns:
      A dictionary with keys for each element of the job description. For example::

      {'Notebook': 'scala-spark-test.ipynb',
       'Rule': '',
       'Parameters': '{"input": "s3://notebook-testing/const.txt"}',
       'Job': 'papermill-scala-spark-test-2020-10-21-20-00-11',
       'Status': 'Completed',
       'Failure': None,
       'Created': datetime.datetime(2020, 10, 21, 13, 0, 12, 817000, tzinfo=tzlocal()),
       'Start': datetime.datetime(2020, 10, 21, 13, 4, 1, 58000, tzinfo=tzlocal()),
       'End': datetime.datetime(2020, 10, 21, 13, 4, 55, 710000, tzinfo=tzlocal()),
       'Elapsed': datetime.timedelta(seconds=54, microseconds=652000),
       'Result': 's3://sagemaker-us-west-2-1234567890/papermill_output/scala-spark-test-2020-10-21-20-00-11.ipynb',
       'Input': 's3://sagemaker-us-west-2-1234567890/papermill_input/notebook-2020-10-21-20-00-08.ipynb',
       'Image': 'spark-scala-notebook-runner',
       'Instance': 'ml.m5.large',
       'Role': 'BasicExecuteNotebookRole-us-west-2'}
    """
    session = ensure_session(session)
    client = session.client("sagemaker")

    while True:
        try:
            desc = client.describe_processing_job(ProcessingJobName=job_name)
            break
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "ThrottlingException":
                time.sleep(1)
            else:
                raise e

    status = desc["ProcessingJobStatus"]
    if status == "Completed":
        output_prefix = desc["ProcessingOutputConfig"]["Outputs"][0][
            "S3Output"]["S3Uri"]
        notebook_name = os.path.basename(
            desc["Environment"]["PAPERMILL_OUTPUT"])
        result = f"{output_prefix}/{notebook_name}"
    else:
        result = None

    if status == "Failed":
        failure = desc["ExitMessage"]
    else:
        failure = None

    d = {}
    d["Notebook"] = desc["Environment"].get("PAPERMILL_NOTEBOOK_NAME", "")
    d["Rule"] = desc["Environment"].get("AWS_EVENTBRIDGE_RULE", "")
    d["Parameters"] = desc["Environment"].get("PAPERMILL_PARAMS", "")
    d["Job"] = job_name
    d["Status"] = status
    d["Failure"] = failure
    d["Created"] = desc["CreationTime"]
    d["Start"] = desc.get("ProcessingStartTime")
    d["End"] = desc.get("ProcessingEndTime")
    elapsed = None
    if d.get("Start") is not None and d.get("End") is not None:
        elapsed = d["End"] - d["Start"]
    d["Elapsed"] = elapsed
    d["Result"] = result
    d["Input"] = desc["ProcessingInputs"][0]["S3Input"]["S3Uri"]
    d["Image"] = abbreviate_image(desc["AppSpecification"]["ImageUri"])
    d["Instance"] = desc["ProcessingResources"]["ClusterConfig"][
        "InstanceType"]
    d["Role"] = abbreviate_role(desc["RoleArn"])

    return d
def execute_notebook(
    *,
    image,
    input_path,
    output_prefix,
    notebook,
    parameters,
    role=None,
    instance_type,
    session,
):
    session = ensure_session(session)

    if not role:
        role = get_execution_role(session)
    elif "/" not in role:
        account = session.client("sts").get_caller_identity()["Account"]
        role = f"arn:aws:iam::{account}:role/{role}"

    if "/" not in image:
        account = session.client("sts").get_caller_identity()["Account"]
        region = session.region_name
        image = f"{account}.dkr.ecr.{region}.amazonaws.com/{image}:latest"

    if notebook is None:
        notebook = input_path

    base = os.path.basename(notebook)
    nb_name, nb_ext = os.path.splitext(base)
    timestamp = time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

    job_name = (
        ("papermill-" +
         re.sub(r"[^-a-zA-Z0-9]", "-", nb_name))[:62 - len(timestamp)] + "-" +
        timestamp)
    input_directory = "/opt/ml/processing/input/"
    local_input = os.path.join(input_directory, os.path.basename(notebook))
    result = f"{nb_name}-{timestamp}{nb_ext}"
    local_output = "/opt/ml/processing/output/"

    api_args = {
        "ProcessingInputs": [
            {
                "InputName": "notebook",
                "S3Input": {
                    "S3Uri": input_path,
                    "LocalPath": input_directory,
                    "S3DataType": "S3Prefix",
                    "S3InputMode": "File",
                    "S3DataDistributionType": "FullyReplicated",
                },
            },
        ],
        "ProcessingOutputConfig": {
            "Outputs": [
                {
                    "OutputName": "result",
                    "S3Output": {
                        "S3Uri": output_prefix,
                        "LocalPath": local_output,
                        "S3UploadMode": "EndOfJob",
                    },
                },
            ],
        },
        "ProcessingJobName":
        job_name,
        "ProcessingResources": {
            "ClusterConfig": {
                "InstanceCount": 1,
                "InstanceType": instance_type,
                "VolumeSizeInGB": 40,
            }
        },
        "StoppingCondition": {
            "MaxRuntimeInSeconds": 7200
        },
        "AppSpecification": {
            "ImageUri": image,
            "ContainerArguments": [
                "run_notebook",
            ],
        },
        "RoleArn":
        role,
        "Environment": {},
    }

    api_args["Environment"]["PAPERMILL_INPUT"] = local_input
    api_args["Environment"]["PAPERMILL_OUTPUT"] = local_output + result
    if os.environ.get("AWS_DEFAULT_REGION") != None:
        api_args["Environment"]["AWS_DEFAULT_REGION"] = os.environ[
            "AWS_DEFAULT_REGION"]
    api_args["Environment"]["PAPERMILL_PARAMS"] = json.dumps(parameters)
    api_args["Environment"]["PAPERMILL_NOTEBOOK_NAME"] = notebook

    client = boto3.client("sagemaker")
    result = client.create_processing_job(**api_args)
    job_arn = result["ProcessingJobArn"]
    job = re.sub("^.*/", "", job_arn)
    return job