def test_get_execution_role_throws_exception_if_arn_is_not_role():
    session = Mock()
    session.get_caller_identity_arn.return_value = 'arn:aws:iam::369233609183:user/marcos'

    with pytest.raises(ValueError) as error:
        get_execution_role(session)
    assert 'ValueError: The current AWS identity is not a role' in str(error)
def test_get_execution_role_works_with_service_role():
    session = Mock()
    session.get_caller_identity_arn.return_value = \
        'arn:aws:iam::369233609183:role/service-role/AmazonSageMaker-ExecutionRole-20171129T072388'

    actual = get_execution_role(session)
    assert actual == 'arn:aws:iam::369233609183:role/service-role/AmazonSageMaker-ExecutionRole-20171129T072388'
import sagemaker
from pathlib import Path


def get_current_folder(global_variables):
    # if calling from a file
    if "__file__" in global_variables:
        current_file = Path(global_variables["__file__"])
        current_folder = current_file.parent.resolve()
    # if calling from a notebook
    else:
        current_folder = Path(os.getcwd())
    return current_folder

default_bucket = sagemaker.session.Session(boto3.session.Session()).default_bucket()
default_role = sagemaker.get_execution_role()

cfn_stack_outputs = {}
current_folder = get_current_folder(globals())
cfn_stack_outputs_filepath = Path(current_folder, '../stack_outputs.json').resolve()

if os.path.exists(cfn_stack_outputs_filepath):
    with open(cfn_stack_outputs_filepath) as f:
        cfn_stack_outputs = json.load(f)


solution_prefix = cfn_stack_outputs.get('SolutionPrefix', 'sm-soln-pred-maint')
solution_bucket = cfn_stack_outputs.get('SolutionS3Bucket', default_bucket)
s3_prefix = cfn_stack_outputs.get('SolutionS3Prefix', 'pred-maintenance-artifacts')

training_job_name = cfn_stack_outputs.get('SageMakerTrainingJobName', 'sm-soln-pred-maint-model')
import time

t1 = time.time()

# Split training, validation, and test
ys = np.array(df['price']).astype("float32")
xs = np.array(df['sqft_living']).astype("float32").reshape(-1, 1)

np.random.seed(8675309)
train_features, test_features, train_labels, test_labels = train_test_split(
    xs, ys, test_size=0.2)
val_features, test_features, val_labels, test_labels = train_test_split(
    test_features, test_labels, test_size=0.5)

# Train model
linear_model = sagemaker.LinearLearner(role=sagemaker.get_execution_role(),
                                       instance_count=1,
                                       instance_type='ml.m4.xlarge',
                                       predictor_type='regressor')

train_records = linear_model.record_set(train_features,
                                        train_labels,
                                        channel='train')
val_records = linear_model.record_set(val_features,
                                      val_labels,
                                      channel='validation')
test_records = linear_model.record_set(test_features,
                                       test_labels,
                                       channel='test')

linear_model.fit([train_records, val_records, test_records], logs=False)
Esempio n. 5
0
# Replace **`<LabBucketName>`** with the resource name that was provided with your lab account.

# In[21]:


import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import image_uris
from sagemaker.amazon.amazon_estimator import RecordSet

# Instantiate an Amazon SageMaker session
sess = sagemaker.Session()

# Get the Amazon SageMaker role 
role = get_execution_role()

# Bucket name
bucket = 'ml-pipeline-bucket'

# Get the image URI for the container that includes the linear learner algorithm
container = image_uris.retrieve('linear-learner',boto3.Session().region_name)

print(f'Session {sess}')
print(f'The role is {role}')
print(f'The container is {role} in the {boto3.Session().region_name} region')


# In[22]:

Esempio n. 6
0
def test_get_execution_role():
    session = Mock()
    session.get_caller_identity_arn.return_value = 'arn:aws:iam::369233609183:role/SageMakerRole'

    actual = get_execution_role(session)
    assert actual == 'arn:aws:iam::369233609183:role/SageMakerRole'
Esempio n. 7
0
def setup_workflow(project, purpose, workflow_execution_role, script_dir,
                   ecr_repository):
    """ to setup all needed for a step function with sagemaker.
    arg: 
        project: project name under sagemaker
        purpose: subproject
        workflow_execution_role: arn to execute step functions
        script_dir: processing file name, like a .py file
        ecr_repository: ecr repository name
    return:
        workflow: a stepfunctions.workflow.Workflow instance  
    example: 
        PROJECT = '[dpt-proj-2022]'
        PURPOSE = '[processing]'
        WORKFLOW_EXECUTION_ROLE = "arn:aws-cn:iam::[*********]:role/[**************]"
        SCRIPT_DIR = "[processing].py"
        ECR_REPOSITORY = '[ecr-2022]'
    """

    # SageMaker Session setup
    # ========================================================================================
    # SageMaker Session
    # ====================================
    account_id = boto3.client('sts').get_caller_identity().get('Account')
    role = sagemaker.get_execution_role()

    # Storage
    # ====================================
    session = sagemaker.Session()
    region = session.boto_region_name
    s3_output = session.default_bucket()

    # Code storage
    # ==================
    s3_prefix = '{}/{}'.format(project, purpose)
    s3_prefix_code = '{}/code'.format(s3_prefix)
    s3CodePath = 's3://{}/{}/code'.format(s3_output, s3_prefix)

    ## preprocess & prediction
    script_list = [script_dir]

    for script in script_list:
        session.upload_data(script,
                            bucket=session.default_bucket(),
                            key_prefix=s3_prefix_code)

    # ECR environment
    # ====================================
    uri_suffix = 'amazonaws.com.cn'
    tag = ':latest'
    ecr_repository_uri = '{}.dkr.ecr.{}.{}/{}'.format(account_id, region,
                                                      uri_suffix,
                                                      ecr_repository + tag)

    # SageMaker Experiments setup
    # ========================================================================================
    experiment = Experiment.create(
        experiment_name="{}-{}".format(project, int(time.time())),
        description="machine learning project",
        sagemaker_boto_client=boto3.client('sagemaker'))
    print(experiment)

    execution_input = ExecutionInput(schema={
        "ProcessingJobName": str,
        "ResultPath": str,
    })

    # setup script processor
    script_processor = ScriptProcessor(command=['python3'],
                                       image_uri=ecr_repository_uri,
                                       role=role,
                                       instance_count=1,
                                       instance_type='ml.m5.4xlarge')

    # Step
    # ========================================================================================

    optimizing_step = steps.ProcessingStep(
        "Processing Step",
        processor=script_processor,
        job_name=execution_input["ProcessingJobName"],
        inputs=[
            ProcessingInput(source=s3CodePath,
                            destination='/opt/ml/processing/input/code',
                            input_name='code')
        ],
        outputs=[
            ProcessingOutput(output_name=purpose,
                             destination=execution_input["ResultPath"],
                             source='/opt/ml/processing/{}'.format(purpose))
        ],
        container_entrypoint=[
            "python3", "/opt/ml/processing/input/code/" + script_dir
        ],
    )

    # Fail Sate
    # ========================================================================================
    failed_state = steps.states.Fail("Processing Workflow failed",
                                     cause="SageMakerProcessingJobFailed")

    catch_state_processing = steps.states.Catch(
        error_equals=["States.TaskFailed"], next_step=failed_state)

    # Create Workflow
    # ========================================================================================
    optimizing_step.add_catch(catch_state_processing)

    workflow_name = workflow_name = "workflow-{}-{}".format(project,
                                                            purpose).upper()
    workflow_graph = steps.Chain([optimizing_step])

    workflow = Workflow(name=workflow_name,
                        definition=workflow_graph,
                        role=workflow_execution_role)

    workflow.create()
    return workflow
def role(sagemaker_session):
    return get_execution_role(sagemaker_session)
print(f"Length of train_labels is: {train_labels.shape}")
print(f"Length of val_features is: {val_features.shape}")
print(f"Length of val_labels is: {val_labels.shape}")
print(f"Length of test_features is: {test_features.shape}")
print(f"Length of test_labels is: {test_labels.shape}")


# Now, call the Amazon SageMaker `LinearLearner()` algorithm. This example uses an `ml.m4.xlarge` instance for training. `predictor_type` is set to __'binary_classifier'__ because there are two classes: "spam" and "not spam".

# In[7]:


import sagemaker

# Call the LinearLearner estimator object
binary_estimator = sagemaker.LinearLearner(role=sagemaker.get_execution_role(),
                                               instance_count=1,
                                               instance_type='ml.m4.xlarge',
                                               predictor_type='binary_classifier')


# Use the `record_set()` function of the binary_estimator to set the training, validation, and test parts of the estimator. 

# In[8]:


train_records = binary_estimator.record_set(train_features, train_labels, channel='train')
val_records = binary_estimator.record_set(val_features, val_labels, channel='validation')
test_records = binary_estimator.record_set(test_features, test_labels, channel='test')

Esempio n. 10
0
def get_credentials():
    """Retrieve and return sagemaker credentials: session, role, S3 bucket"""
    sage_session = sagemaker.Session()
    role = sagemaker.get_execution_role()
    bucket = sage_session.default_bucket()
    return sage_session, role, bucket
Esempio n. 11
0
def build_model(model_name, app, desc):
    # role
    common_prefix = "DEMO-gluoncv-model-zoo"
    training_input_prefix = common_prefix + "/training-input-data"
    role = get_execution_role()
    sess = sage.Session()

    # create estimator
    account = sess.boto_session.client('sts').get_caller_identity()['Account']
    region = sess.boto_session.region_name
    image = '{}.dkr.ecr.{}.amazonaws.com/gluoncv-{}:latest'.format(
        account, region, app.replace('_', '-'))
    TRAINING_WORKDIR = "data/training"

    training_input = sess.upload_data(TRAINING_WORKDIR,
                                      key_prefix=training_input_prefix)
    # print ("Training Data Location " + training_input)
    classifier = sage.estimator.Estimator(
        image,
        role,
        1,
        'ml.c4.xlarge',
        output_path="s3://{}/output".format(sess.default_bucket()),
        sagemaker_session=sess,
        hyperparameters={'model_name': model_name})
    classifier.fit(training_input)

    TRANSFORM_WORKDIR = "data/transform"
    batch_inference_input_prefix = common_prefix + "/batch-inference-input-data"
    transform_input = sess.upload_data(
        TRANSFORM_WORKDIR,
        key_prefix=batch_inference_input_prefix) + "/cat1.jpg"

    # deploy
    if args.deploy_test:
        model = classifier.create_model()
        predictor = classifier.deploy(1, 'ml.m4.xlarge')
        with open('data/transform/cat1.jpg', 'rb') as f:
            x = f.read()
            # print(predictor.predict(x, initial_args={'ContentType':'image/jpeg'}).decode('utf-8'))
        sess.delete_endpoint(predictor.endpoint)

    smmp = boto3.client(
        'sagemaker',
        region_name=region,
        endpoint_url="https://sagemaker.{}.amazonaws.com".format(region))
    modelpackage_inference_specification = InferenceSpecification(
    ).get_inference_specification_dict(
        ecr_image=image,
        supports_gpu=True,
        supported_content_types=["image/jpeg", "image/png"],
        supported_mime_types=["text/plain", "application/json"])

    # Specify the model data resulting from the previously completed training job
    modelpackage_inference_specification["InferenceSpecification"][
        "Containers"][0]["ModelDataUrl"] = classifier.model_data

    # validation specificiation
    modelpackage_validation_specification = ModelPackageValidationSpecification(
    ).get_validation_specification_dict(validation_role=role,
                                        batch_transform_input=transform_input,
                                        content_type="image/jpeg",
                                        instance_type="ml.c4.xlarge",
                                        output_s3_location='s3://{}/{}'.format(
                                            sess.default_bucket(),
                                            common_prefix))

    model_package_name = "gluoncv-{}-".format(
        model_name.replace('_', '-').replace('.', '-')) + str(
            round(time.time()))
    create_model_package_input_dict = {
        "ModelPackageName": model_package_name,
        "ModelPackageDescription": model_name + desc,
        "CertifyForMarketplace": True
    }
    create_model_package_input_dict.update(
        modelpackage_inference_specification)
    create_model_package_input_dict.update(
        modelpackage_validation_specification)

    smmp.create_model_package(**create_model_package_input_dict)

    while True:
        response = smmp.describe_model_package(
            ModelPackageName=model_package_name)
        status = response["ModelPackageStatus"]
        # print (model_name, ':', status)
        if (status == "Completed" or status == "Failed"):
            #print (response["ModelPackageStatusDetails"])
            break
        time.sleep(5)
Esempio n. 12
0
parser.add_argument('--max-parallel-jobs',
                    type=int,
                    default=3,
                    help='Maximum parallel jobs')
parser.add_argument('--wait', dest='wait', action='store_true')
parser.add_argument('--no-wait', dest='wait', action='store_false')
parser.set_defaults(wait=True)
args = parser.parse_args()

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = 'sagemaker/pytorch-cifar10'

# If you are running this outside of a sagemaker notebook, you must set SAGEMAKER_ROLE
role = os.getenv('SAGEMAKER_ROLE') or sagemaker.get_execution_role()
wandb.sagemaker_auth(path="source")

# Ensure training data is stored in s3
inputs = "s3://{}/{}".format(bucket, prefix)
try:
    file = os.path.join(prefix, 'cifar-10-python.tar.gz')
    sagemaker_session.boto_session.resource('s3').Object(bucket, file).load()
except botocore.exceptions.ClientError as e:
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    trainset = torchvision.datasets.CIFAR10('data',
                                            download=True,
Esempio n. 13
0
import boto3
from torchvision import datasets, transforms
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

DIR_PATH = os.path.dirname(os.path.realpath(__file__))

if __name__ == '__main__':
    boto_session = boto3.Session(profile_name="packt-sagemaker")
    sagemaker_session = sagemaker.Session(boto_session=boto_session)

    bucket = sagemaker_session.default_bucket()
    prefix = 'sagemaker/DEMO-pytorch-fashion-mnist'

    role = sagemaker.get_execution_role(sagemaker_session)

    datasets.FashionMNIST(
        'data',
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(
            )  # Convert a PIL Image or numpy.ndarray to tensor, default : range [0, 255] -> [0.0,1.0].
        ]))

    inputs = sagemaker_session.upload_data(path='data',
                                           bucket=bucket,
                                           key_prefix=prefix)
    print('input spec (in this case, just an S3 path): {}'.format(inputs))

    entry_point_path = os.path.join(DIR_PATH, "fashion_mnist.py")
Esempio n. 14
0
def train(source_dir,
          data_path='doodle/data',
          training_steps=20000,
          evaluation_steps=2000,
          train_instance_type='local',
          train_instance_count=1,
          run_tensorboard_locally=True,
          uid=None,
          role=None,
          bucket=None,
          profile_name=None):
    assert os.path.exists(source_dir)
    boto_session = boto3.Session(profile_name=profile_name)
    session = sagemaker.Session(boto_session=boto_session)
    role = role if role is not None else sagemaker.get_execution_role()
    bucket = bucket if bucket is not None else session.default_bucket()
    uid = uid if uid is not None else uuid4()
    logger.debug(session.get_caller_identity_arn())
    role = session.expand_role(role)

    params = {
        'train_tfrecord_file': 'train.tfr',
        'test_tfrecord_file': 'test.tfr',
        'samples_per_epoch': 700000,
        'save_summary_steps': 100,
    }

    output_path = 's3://{}/doodle/model/{}/export'.format(bucket, uid)
    checkpoint_path = 's3://{}/doodle/model/{}/ckpt'.format(bucket, uid)
    code_location = 's3://{}/doodle/model/{}/source'.format(bucket, uid)
    base_job_name = 'doodle-training-job-{}'.format(uid)
    data_dir = 's3://{}/{}'.format(bucket, data_path)

    logger.info('uid          : {}'.format(uid))
    logger.info('execution_role     : {}'.format(role))
    logger.info('data_dir       : {}'.format(data_dir))
    logger.info('output_path      : {}'.format(output_path))
    logger.info('checkpoint_path    : {}'.format(checkpoint_path))
    logger.info('code_location    : {}'.format(code_location))
    logger.info('base_job_name    : {}'.format(base_job_name))
    logger.info('training_steps     : {}'.format(training_steps))
    logger.info('evaluation_steps   : {}'.format(evaluation_steps))
    logger.info('train_instance_count : {}'.format(train_instance_count))
    logger.info('train_instance_type  : {}'.format(train_instance_type))
    logger.info('hyperparameters    : {}'.format(json.dumps(params)))

    estimator = TensorFlow(hyperparameters=params,
                           output_path=output_path,
                           checkpoint_path=checkpoint_path,
                           code_location=code_location,
                           base_job_name=base_job_name,
                           source_dir=source_dir,
                           entry_point='doodle.py',
                           framework_version='1.6',
                           role=role,
                           training_steps=training_steps,
                           evaluation_steps=evaluation_steps,
                           train_instance_count=train_instance_count,
                           train_instance_type=train_instance_type)

    estimator.fit(data_dir, run_tensorboard_locally=run_tensorboard_locally)
Esempio n. 15
0
#70 DATA FROM TARINING & 30% data for vaildation 
#Code-1---------------------------------------------------------------------------------------------------------------

from sagemaker import get_execution_role

#Bucket location to save your custom code in tar.gz format.
custom_code_upload_location = 's3://sagemaker-02122018/customcode/tensorflow_iris'

#Bucket location where results of model training are saved.
model_artifacts_location = 's3://sagemaker-02122018/artifacts'

#IAM execution role that gives SageMaker access to resources in your AWS account.
role = get_execution_role()#The get_execution_role function retrieves the IAM role you created at the time of creating your notebook instance. 

#Code-2---------------------------------------------------------------------------------------------------------------


 !cat "iris_dnn_classifier.py"
 
 #Code-3---------------------------------------------------------------------------------------------------------------
 from sagemaker.tensorflow import TensorFlow #Importing tensorflow class from sagemaker

iris_estimator = TensorFlow(entry_point='iris_dnn_classifier.py',  #Creating Estimator,etreing python scrypt
                            role=role, #IAM Role
                            framework_version='1.11.0',
                            output_path=model_artifacts_location,
                            code_location=custom_code_upload_location,
                            train_instance_count=1, #no of instences
                            train_instance_type='ml.c4.xlarge',
                            training_steps=1000,  
Esempio n. 16
0
def _get_default_role():
    role = get_execution_role()
    if 'partial_role' in _defaults:
        role = _defaults['partial_role']
    return role
sm = boto3.Session().client(service_name='sagemaker', region_name=region)

# トレーニングを開始する
sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name,
                      InputDataConfig=[{
                          'DataSource': {
                              'S3DataSource': {
                                  'S3DataType': 'S3Prefix',
                                  'S3Uri': s3_data_dir_path
                              }
                          },
                          'TargetAttributeName':
                          target_column_name
                      }],
                      OutputDataConfig={'S3OutputPath': s3_data_output_path},
                      RoleArn=get_execution_role())

# トレーニング状態を確認する
while True:
    resp = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    if resp['AutoMLJobStatus'] == 'Completed':
        break
    time.sleep(10)

# 最も精度の良い候補でモデルを作成する
best_candidate = resp['BestCandidate']
model_name = auto_ml_job_name + '-model'
model_arn = sm.create_model(Containers=best_candidate['InferenceContainers'],
                            ModelName=model_name,
                            ExecutionRoleArn=role)
def train_deploy_chat_bert():

    role = sagemaker.get_execution_role()
    session = sagemaker.Session()

    # location for train.csv, val.csv and labels.csv
    DATA_PATH = Path("../data/")

    # Location for storing training_config.json
    CONFIG_PATH = DATA_PATH/'config'
    CONFIG_PATH.mkdir(exist_ok=True)

    # S3 bucket name
    bucket = 'sagemaker-deep-learning-bert-v2'

    # Prefix for S3 bucket for input and output
    prefix = 'sagemaker-deep-learning-bert-v2/input'
    prefix_output = 'sagemaker-deep-learning-bert-v2/output'

    hyperparameters = dict(epochs=10, lr=8e-5, max_seq_length=512, train_batch_size=16, lr_schedule="warmup_cosine",
                           warmup_steps=1000, optimizer_type="adamw")



    training_config = dict(run_text="text-classification-bert", finetuned_model=None, do_lower_case="True",
                           train_file="train.csv", val_file="val.csv", label_file="labels.csv", text_col="message_body",
                   label_col='["abusive","asking_exchange","normal","offline_sell","possible_fraud","sharing_contact_details"]',
                           multi_label="True", grad_accumulation_steps="1", fp16_opt_level="O1", fp16="True",
                           model_type="roberta", model_name="roberta-base", logging_steps="300")
    with open(CONFIG_PATH/'training_config.json', 'w') as f:
        json.dump(training_config, f)

   

    # This is a  feature to upload data to S3 bucket

    s3_input = session.upload_data(DATA_PATH, bucket=bucket , key_prefix=prefix)

    session.upload_data(str(DATA_PATH/'labels.csv'), bucket=bucket , key_prefix=prefix)
    session.upload_data(str(DATA_PATH/'train.csv'), bucket=bucket , key_prefix=prefix)
    session.upload_data(str(DATA_PATH/'val.csv'), bucket=bucket , key_prefix=prefix)

    #  Creating an Estimator and start training

    account = session.boto_session.client('sts').get_caller_identity()['Account']
    region = session.boto_session.region_name

    image = "{}.dkr.ecr.{}.amazonaws.com/fluent-sagemaker-fast-bert:1.0-gpu-py36".format(account, region)

    output_path = "s3://{}/{}".format(bucket, prefix_output)

    estimator = sagemaker.estimator.Estimator(image,
                                              role,
                                              train_instance_count=1,
                                              train_instance_type='ml.p2.xlarge',
                                              output_path=output_path,
                                              base_job_name='bert-text-classification-v1',
                                              hyperparameters=hyperparameters,
                                              sagemaker_session=session
                                             )

    estimator.fit(s3_input)


    # Deploy the model to hosting service


    predictor = estimator.deploy(1,
                                 'ml.m5.large',
                                 endpoint_name='bert-text-classification-v1',
                                 serializer=json_serializer)
Esempio n. 19
0
    'servable_model_dir': '/opt/ml/model',
    'loss_weight': 1.0,
    'use_context': True,
    'max_click_history': 30,
    'num_epochs': 1,
    'max_title_length': 16,
    'entity_dim': 128,
    'word_dim': 300,
    'batch_size': 128,
    'perform_shuffle': 1,
    'checkpointPath': '/opt/ml/checkpoints'
}

byoc_est = sagemaker.estimator.Estimator(
    '662566784674.dkr.ecr.ap-northeast-1.amazonaws.com/gw-dkn:20201114025113',
    role=sagemaker.get_execution_role(),
    train_instance_count=1,
    train_instance_type=train_instance_type,
    base_job_name='dkn-byoc',
    hyperparameters=hyperparameters)

train_s3 = "s3://leigh-gw/train.csv/"
test_s3 = "s3://leigh-gw/test.csv/"
inputs = {'train': train_s3, 'eval': test_s3}

train_config = training_config(estimator=byoc_est, inputs=inputs)


# step - trigger CDK to deploy model as ECS service using Airflow Python Operator
def dkn_model_deploy(data, **context):
    print("mock for dkn deployment")
Esempio n. 20
0
 def __init__(self, aws_profile, aws_region):
     self.boto_session = boto3.Session(profile_name=aws_profile,
                                       region_name=aws_region)
     self.sagemaker_session = sage.Session(boto_session=self.boto_session)
     self.role = sage.get_execution_role(self.sagemaker_session)
Esempio n. 21
0
import s3fs
import sagemaker
from sagemaker import get_execution_role

#%% [markdown]
# Start by specifying:
# - The Amazon S3 bucket and prefix that you want to use for the training and model data. This should be within the same region as the notebook, training, and hosting instances. Replace `'FILL_IN_LAB_BUCKET_NAME'` below with the name of the  lab's s3 bucket; this can be found in Qwiklabs along the left-hand side, under *LabBucket*. Just copy and paste that bucket name into the appropriate location in the code cell below before running the cell.
# - The IAM role ARN is used to give training and hosting access to your data. See the documentation for how to create these. Here, the `get_execution_role` function obtains the role ARN, which was specified when creating the notebook.

#%%
bucket = 'FILL_IN_LAB_BUCKET_NAME'
prefix = 'demos/deepar/forecast-electricity'
assert bucket != 'FILL_IN_LAB_BUCKET_NAME', 'Student needs to set bucket to match the lab S3 bucket name.'

sagemaker_session = sagemaker.Session()
role = get_execution_role()

s3_data_path = "{}/{}/data".format(bucket, prefix)
s3_output_path = "{}/{}/output".format(bucket, prefix)
print('Data location: %s' % s3_data_path)
print('Output location: %s' % s3_output_path)

#%% [markdown]
# # Real Data Set: Electricity Consumption <a name="data"></a>
#
# **Dataset License and Information:**
#
# For this lab, you will be using an open source dataset entitled, [“Individual Household Electric Power Consumption”](https://archive.ics.uci.edu/ml/datasets/Individual+household+electric+power+consumption) that comes from the UCI Machine Learning Repository. Information about the dataset license can be found below.
#
# The MIT License (MIT) Copyright © [2017] Zalando SE, [https://tech.zalando.com](https://tech.zalando.com).
#
"""Creates a FLow Definition ARN for A2I"""
import boto3
import io
import json
import uuid
import botocore
import time
import botocore
from sagemaker import get_execution_role

ROLE = get_execution_role()
REGION = ""
WORKTEAM_ARN = ""
HUMAN_TASK_ARN = ""
BUCKET = ""
OUTPUT_PATH = f's3://{BUCKET}/a2i_results'


def create_flow_definition(flow_definition_name, human_task_arn, task_title):
    '''
    Creates Flow Definition resource.
    Returns FlowDefinitionArn
    '''
    # Amazon SageMaker client
    SAGEMAKER_CLIENT = boto3.client('sagemaker', REGION)

    # A2I Runtime client
    A2I_RUNTIME_CLIENT = boto3.client('sagemaker-a2i-runtime', REGION)

    response = SAGEMAKER_CLIENT.create_flow_definition(
        FlowDefinitionName=flow_definition_name,
def test_get_execution_role():
    session = Mock()
    session.get_caller_identity_arn.return_value = 'arn:aws:iam::369233609183:role/SageMakerRole'

    actual = get_execution_role(session)
    assert actual == 'arn:aws:iam::369233609183:role/SageMakerRole'
Esempio n. 24
0
"""
runs script train_val_test_split on an ec2 instance as a sagemaker processing job
"""
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
import sagemaker
import boto3
## locations and vars
BUCKET = sagemaker.Session().default_bucket()
INPUT_FOLDER = 'stock-data-raw-csv'
OUTPUT_FOLDER = 'DEMO-xgboost-as-a-built-in-algo'
ROLE_ARN = sagemaker.get_execution_role()
## image uri code
ACCOUNT_ID = boto3.client('sts').get_caller_identity().get('Account')
REGION = boto3.Session().region_name
ECR_REPOSITORY = 'sagemaker-processing-container'
TAG = ':latest'
IMAGE_URI = '{}.dkr.ecr.{}.amazonaws.com/{}'.format(ACCOUNT_ID, REGION,
                                                    ECR_REPOSITORY + TAG)
## call processing job
script_processor = ScriptProcessor(command=['python3'],
                                   image_uri=IMAGE_URI,
                                   role=ROLE_ARN,
                                   instance_count=1,
                                   instance_type='ml.m5.xlarge')

script_processor.run(
    code='train_val_test_split.py',
    inputs=[
        ProcessingInput(source=f's3://{BUCKET}/{INPUT_FOLDER}/',
                        destination='/opt/ml/processing/input')
    ],
Esempio n. 25
0
#Enviroment

import sagemaker

bucket = sagemaker.Session().default_bucket() # we are using a default bucket here but you can change it to any bucket in your account
prefix = 'sagemaker/DEMO-hpo-tensorflow-high' # you can customize the prefix (subfolder) here

role = sagemaker.get_execution_role() # we are using the notebook instance role for training in this example


import boto3
from time import gmtime, strftime
from sagemaker.tensorflow import TensorFlow
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

#Download MNIST dataset
import utils
from tensorflow.contrib.learn.python.learn.datasets import mnist
import tensorflow as tf

data_sets = mnist.read_data_sets('data', dtype=tf.uint8, reshape=False, validation_size=5000)
utils.convert_to(data_sets.train, 'train', 'data')
utils.convert_to(data_sets.validation, 'validation', 'data')
utils.convert_to(data_sets.test, 'test', 'data')

#Upload the data
inputs = sagemaker.Session().upload_data(path='data', bucket=bucket, key_prefix=prefix+'/data/mnist')
print (inputs)


#Script for distributed training
Esempio n. 26
0
def main():
    # define some configurations from env

    # AWS especific
    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', None)
    AWS_PROFILE = os.getenv('AWS_PROFILE', None)
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

    ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role())

    # MLOps especific
    PIPELINE_NAME = os.getenv('PIPELINE_NAME', 'stsPipeline')
    MODEL_PACKAGE_GROUP_NAME = os.getenv('MODEL_PACKAGE_GROUP_NAME',
                                         'sts-sklearn-grp')
    BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts')

    outputs = {'pipeline': None, 'baseline': None, 'train': None}

    try:
        # define the ml pipeline for training
        pipe = get_pipeline(AWS_DEFAULT_REGION,
                            sm_session,
                            role=ROLE_ARN,
                            pipeline_name=PIPELINE_NAME,
                            model_package_group_name=MODEL_PACKAGE_GROUP_NAME,
                            base_job_prefix=BASE_JOB_PREFIX)

        # output debug information
        parsed = json.loads(pipe.definition())
        outputs['pipeline'] = parsed
        _l.debug('ML Pipeline definition')
        _l.debug(json.dumps(parsed, indent=2, sort_keys=True))

        # Created/Updated SageMaker Pipeline
        upsert_response = pipe.upsert(role_arn=ROLE_ARN)
        _l.debug(
            f"C/U SageMaker Pipeline: Response received: {upsert_response}")

        _l.info("Starting the SageMaker pipeline")
        execution = pipe.start()
        _l.info("Waiting for the pipeline")
        execution.wait()

        _l.info("Pipeline finished: !!!")
        _l.debug(f"{pprint.pformat(execution.list_steps())}")

        # Take the s3 uri of the baseline datatase baseline.csv
        mse_step = extract_step_from_list(parsed.get('Steps'),
                                          'CheckMSESTSEvaluation')
        mon_step = extract_step_from_list(
            mse_step.get('Arguments').get('IfSteps'), 'SetupMonitoringData')

        outputs['baseline'] = get_outputs(mon_step)
        # take de s3 uri of train, validate, and test datasets
        train_step_def = extract_step_from_list(parsed.get('Steps'),
                                                'PreprocessSTSData')
        outputs['train'] = get_outputs(train_step_def)
        # --

        # whrite the pipeline def and the selected outputs to a json
        # file
        with open('trainmodel_out.json', 'w') as f:
            json.dump(outputs, f)
        # ---
    except Exception as e:
        _l.exception(f"Exception: {e}")