Ejemplo n.º 1
0
class SagemakerRegression:
    def __init__(self, script_path, data_path, hyperparameters):
        self.script_path = script_path
        self.data_path = data_path
        self.hyperparameters = hyperparameters

        self.create_sagemaker_session()
        self.create_sklearn_estimator()

    def create_sagemaker_session(self):
        self.sagemaker_session = get_sagemaker_session()

    def create_sklearn_estimator(self):
        self.estimator = SKLearn(self.script_path,
                                 instance_type="ml.m4.xlarge",
                                 framework_version="0.20.0",
                                 sagemaker_session=self.sagemaker_session,
                                 role=aws_role,
                                 metric_definitions=[{
                                     "Name":
                                     "train:mse",
                                     "Regex":
                                     "Train_mse=(.*?);"
                                 }, {
                                     "Name": "test:mse",
                                     "Regex": "Test_mse=(.*?);"
                                 }],
                                 hyperparameters=self.hyperparameters)

    def fit(self):
        self.estimator.fit({"train": self.data_path}, wait=False)

    def get_training_name(self):
        return self.estimator.latest_training_job.job_name
Ejemplo n.º 2
0
def train_model_sagemaker(X_train_path: str,
                          sklearn_estimator_kwargs: Dict[str, Any]) -> str:
    """Train the linear regression model on SageMaker.

    Args:
        X_train_path: Full S3 path to `X_train` dataset.
        sklearn_estimator_kwargs: Keyword arguments that will be used
            to instantiate SKLearn estimator.

    Returns:
        Full S3 path to `model.tar.gz` file containing the model artifact.

    """
    sklearn_estimator = SKLearn(**sklearn_estimator_kwargs)

    # we need a path to the directory containing both
    # X_train (feature table) and y_train (target variable)
    inputs_dir = X_train_path.rsplit("/", 1)[0]
    inputs = {"train": inputs_dir}

    # wait=True ensures that the execution is blocked
    # until the job finishes on SageMaker
    sklearn_estimator.fit(inputs=inputs, wait=True)

    training_job = sklearn_estimator.latest_training_job
    job_description = training_job.describe()
    model_path = job_description["ModelArtifacts"]["S3ModelArtifacts"]
    return model_path
Ejemplo n.º 3
0
def test_github_with_ssh_passphrase_not_configured(sagemaker_local_session,
                                                   sklearn_latest_version,
                                                   sklearn_latest_py_version):
    script_path = "mnist.py"
    data_path = os.path.join(DATA_DIR, "sklearn_mnist")
    git_config = {
        "repo": PRIVATE_GIT_REPO_2FA_SSH,
        "branch": PRIVATE_BRANCH_2FA,
        "commit": PRIVATE_COMMIT_2FA,
    }
    source_dir = "sklearn"

    sklearn = SKLearn(
        entry_point=script_path,
        role="SageMakerRole",
        source_dir=source_dir,
        instance_count=1,
        instance_type="local",
        sagemaker_session=sagemaker_local_session,
        framework_version=sklearn_latest_version,
        py_version=sklearn_latest_py_version,
        hyperparameters={"epochs": 1},
        git_config=git_config,
    )
    train_input = "file://" + os.path.join(data_path, "train")
    test_input = "file://" + os.path.join(data_path, "test")

    with pytest.raises(subprocess.CalledProcessError) as error:
        sklearn.fit({"train": train_input, "test": test_input})
    assert "returned non-zero exit status" in str(error)
def cloud():
    sklearn = SKLearn(entry_point='train.py',
                      framework_version="0.23-1",
                      instance_count=1,
                      instance_type='ml.c4.xlarge',
                      py_version="py3",
                      sagemaker_role=role,
                      sagemaker_session=sagemaker_session,
                      source_dir='./src/')

    sklearn.fit({'train': train_path})
Ejemplo n.º 5
0
def test_private_github_with_2fa(sagemaker_local_session,
                                 sklearn_latest_version,
                                 sklearn_latest_py_version):
    script_path = "mnist.py"
    data_path = os.path.join(DATA_DIR, "sklearn_mnist")
    git_config = {
        "repo": PRIVATE_GIT_REPO_2FA,
        "branch": PRIVATE_BRANCH_2FA,
        "commit": PRIVATE_COMMIT_2FA,
        "2FA_enabled": True,
        "token": "",  # TODO: find a secure approach
    }
    source_dir = "sklearn"

    sklearn = SKLearn(
        entry_point=script_path,
        role="SageMakerRole",
        source_dir=source_dir,
        py_version=sklearn_latest_py_version,
        instance_count=1,
        instance_type="local",
        sagemaker_session=sagemaker_local_session,
        framework_version=sklearn_latest_version,
        hyperparameters={"epochs": 1},
        git_config=git_config,
    )
    train_input = "file://" + os.path.join(data_path, "train")
    test_input = "file://" + os.path.join(data_path, "test")
    sklearn.fit({"train": train_input, "test": test_input})

    assert os.path.isdir(sklearn.source_dir)

    with lock.lock(LOCK_PATH):
        try:
            client = sagemaker_local_session.sagemaker_client
            desc = client.describe_training_job(
                TrainingJobName=sklearn.latest_training_job.name)
            model_data = desc["ModelArtifacts"]["S3ModelArtifacts"]
            model = SKLearnModel(
                model_data,
                "SageMakerRole",
                entry_point=script_path,
                framework_version=sklearn_latest_version,
                source_dir=source_dir,
                sagemaker_session=sagemaker_local_session,
                git_config=git_config,
            )
            predictor = model.deploy(1, "local")

            data = numpy.zeros((100, 784), dtype="float32")
            result = predictor.predict(data)
            assert result is not None
        finally:
            predictor.delete_endpoint()
def cloud():
    sklearn = SKLearn(entry_point='train.py',
                      source_dir='./src/',
                      role=role,
                      train_instance_count=1,
                      train_instance_type='ml.c4.xlarge',
                      sagemaker_session=sagemaker_session,
                      hyperparameters={
                          'max_depth': 5,
                          'n_estimators': 10
                      })

    sklearn.fit({'train': train_path})
def local():
    sklearn = SKLearn(entry_point='train.py',
                      source_dir='./src/',
                      role=role,
                      train_instance_count=1,
                      train_instance_type='local',
                      hyperparameters={
                          'max_depth': 5,
                          'n_estimators': 10
                      })

    sklearn.fit({'train': 'file://models/train.csv'})
    predictor = sklearn.deploy(initial_instance_count=1, instance_type='local')
    test_data = pd.read_csv('./models/test.csv', header=None, names=None)
    test_y = test_data.iloc[:, 0]
    test_x = test_data.iloc[:, 1:]
    test_y_preds = predictor.predict(test_x)
    accuracy = accuracy_score(test_y, test_y_preds)
    print('The current accuracy score for the prediction', accuracy)
Ejemplo n.º 8
0
def test_training_script_in_local_container(inspectlocal):
    code_path = "../../src/mlmax/train.py"
    train_data_path = "opt/ml/processing/train/"
    test_data_path = "opt/ml/processing/test/"

    sklearn = SKLearn(
        entry_point=code_path,
        role=role,
        py_version="py3",
        framework_version="0.20.0",
        instance_type="local",
        hyperparameters={"inspect": True if inspectlocal else None},
    )
    sklearn.fit(
        {
            "train": "file://" + train_data_path,
            "test": "file://" + test_data_path
        },
        wait=True,
    )
Ejemplo n.º 9
0
def train():

    try:

        #Create a sagemaker.sklearn.SKLearn Estimator
    
        aws_sklearn = SKLearn(entry_point=TRAIN_SCRIPT,
                            source_dir=SOURCE,
                            train_instance_type='ml.m4.xlarge',
                            role=ROLE)
                            
        #Call the fit method on SKlearn estimator which uses our python script to train the model
        
        aws_sklearn.fit({'train':TRAIN_DATA})

        #Deploy the model created in previous step and create an endpoint
        
        aws_sklearn_predictor = aws_sklearn.deploy(instance_type='ml.m4.xlarge', 
                                                initial_instance_count=1)

    except Exception as e:
        return e
    else:
        return 'success'
Ejemplo n.º 10
0
def train_deploy_model(
        keys,
        instance='ml.m4.xlarge',  # Don't change this!
        instance_count=1,  # Don't change this!
        model_path='tmp/model/model.py',
        key_bucket='tmp/train/embeddings',  # It was: tmp/data/data.pickle. data.pickle is harcoded inside the function
        update=True,  # This should be always true if there is an open endpoint
        hyperparms=None):
    """
  This function trains a sagemaker model and deploys it.

    Args:
       keys (json): Json with credential keys
       instance (str): instance type to train model and deploy it
       instance_count (int): initial instance count for deploying the model
       model_path (str): Directory path where the model is located
       hyperparms (dictionary): Hyperparameters for SVM
    
    Returns:
       Print statement

  """
    with open(keys) as k:
        keys = json.load(k)

    session = boto3.session.Session(
        aws_access_key_id=keys["AWS_ACCESS_KEY_ID"],
        aws_secret_access_key=keys["AWS_SECRET_ACCESS_KEY"],
        region_name=keys["REGION_NAME"])

    #sagemaker_session = sagemaker.local.LocalSession(boto_session = session)
    sagemaker_session = sagemaker.Session(boto_session=session)
    if not hyperparms:
        print(model_path)
        sklearn = SKLearn(entry_point=model_path,
                          train_instance_type=instance,
                          role=keys["ROLE"],
                          sagemaker_session=sagemaker_session)
    else:
        print(model_path)
        sklearn = SKLearn(entry_point=model_path,
                          train_instance_type=instance,
                          role=keys["ROLE"],
                          sagemaker_session=sagemaker_session,
                          hyperparameters=hyperparms)

    ## Data for training
    inputs = sagemaker_session.upload_data(path='tmp/train/embeddings',
                                           key_prefix=key_bucket,
                                           bucket=keys["BUCKET_NAME"])
    ## Training the model
    sklearn.fit({'train': inputs})
    ## Deploying the model
    try:
        predictor = sklearn.deploy(initial_instance_count=instance_count,
                                   instance_type=instance,
                                   endpoint_name=keys["ENDPOINT_NAME"],
                                   update_endpoint=update)
    except:
        print("The model was not deployed")

    return print("Endpoint updated: {}".format(keys["ENDPOINT_NAME"]))
Ejemplo n.º 11
0
    args = parse_train_args()

    sm_boto3 = boto3.client('sagemaker')
    sess = sagemaker.Session()
    region = sess.boto_session.region_name
    bucket = sess.default_bucket()  # this could also be a hard-coded bucket name
    print('Using bucket ' + bucket)
    sm_role = get_sm_execution_role(False, region)

    fdir = os.path.abspath(os.path.dirname(__file__))

    sklearn_preprocessor = SKLearn(
        entry_point='train_preproc.py',
        source_dir=fdir,
        role=sm_role,
        train_instance_type="ml.c4.xlarge",
        base_job_name='preproc-scikit')

    prefix = 'inference-pipeline-scikit-linearlearner'

    # curl -O https://s3-us-west-2.amazonaws.com/sparkml-mleap/data/abalone/abalone.csv
    # train_input = sess.upload_data(
    #     path=os.path.join(fdir, 'abalone.csv'), 
    #     bucket=bucket,
    #     key_prefix='{}/{}'.format(prefix, 'train'))
    train_input = args.train_s3_path
     # there is no need to validate models for pre-processing, so no SM_CHANNEL_TEST
    sklearn_preprocessor.fit({'train': train_input})
    print(f'train input on S3 - {train_input}')
    # https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/estimator.py#L724-L741
    print(f'SKlearn preprocessor trained model uploaded to - {sklearn_preprocessor.model_data}')
Ejemplo n.º 12
0
                            role=get_execution_role(),
                            train_instance_count=1,
                            train_instance_type='ml.m4.xlarge',
                            framework_version='0.20.0',
                            metric_definitions=[{
                                'Name':
                                'median-AE',
                                'Regex':
                                "AE-at-50th-percentile: ([0-9.]+).*$"
                            }],
                            hyperparameters={
                                'n-estimators': 100,
                                'min-samples-leaf': 2,
                                'target': 'churn'
                            })
sklearn_estimator.fit({'train': trainpath, 'test': testpath}, wait=True)

# And now we are ready to host the model

# In[ ]:

sm_boto3 = boto3.client('sagemaker')
artifact = sm_boto3.describe_training_job(
    TrainingJobName=sklearn_estimator.latest_training_job.name
)['ModelArtifacts']['S3ModelArtifacts']

print('Model artifact persisted at ' + artifact)

# In[ ]:

from sagemaker.sklearn.model import SKLearnModel
Ejemplo n.º 13
0
# TESTING: Confirm that data is in S3 bucket
# empty_check = []
# for obj in boto3.resource('s3').Bucket(bucket).objects.all():
#     empty_check.append(obj.key)
#     print(obj.key)

# assert len(empty_check) !=0, 'S3 bucket is empty.'
# print('Test passed!')

# Specify an output path
output_path = 's3://{}/{}'.format(bucket, prefix)

estimator = SKLearn(
    entry_point='train.py',
    source_dir='src',
    role=role,
    framework_version="0.23-1",
    py_version="py3",
    instance_count=1,
    instance_type='ml.c4.xlarge',
    sagemaker_session=sagemaker_session,
    output_path=output_path,
)

# Train your estimator on S3 training data
estimator.fit({'train': input_data})

# deploy your model to create a predictor
predictor = estimator.deploy(initial_instance_count=1,
                             instance_type='ml.t2.medium')
Ejemplo n.º 14
0
# -*- coding: utf-8 -*-

# Deploy the model
from sagemaker.sklearn.estimator import SKLearn

role = 'SageMakerFullAccess_sklearn_api_test'

# Create the SKLearn Object by directing it to the aws_sklearn_main.py script
aws_sklearn = SKLearn(entry_point='aws_sklearn_main.py',
                      train_instance_type='ml.m4.xlarge',
                      role=role)

# Train the model using by passing the path to the S3 bucket with the training data
aws_sklearn.fit({'train': 's3://replace-with-your-bucket-name/'})

# Deploy model
aws_sklearn_predictor = aws_sklearn.deploy(instance_type='ml.t2.medium',
                                           initial_instance_count=1)

# Print the endpoint to test in next step
print(aws_sklearn_predictor.endpoint)

# Uncomment and run to terminate the endpoint after you are finished
#predictor.delete_endpoint()
import config

# Get the working path of script
p = abspath(getsourcefile(lambda: 0))
p = p.rsplit('/', 1)[0]
os.chdir(p)
print('Working Directory is: %s' % os.getcwd())

model_name = 'rf'

FRAMEWORK_VERSION = '0.23-1'  # framework version
role = config.aws_role  # get execution role
aws_sklearn = SKLearn(
    entry_point=p + '/model_scripts_aws/' + model_name +
    '.py',  # change script name for different model
    train_instance_type='ml.m4.2xlarge',
    framework_version=FRAMEWORK_VERSION,
    base_job_name=config.job_name + model_name,  # change for any name
    role=role
    # source_dir='./',
    # requirements_file='requirements.txt'
)

# Send model to train
aws_sklearn.fit({
    'train': config.train_path,
    'test': config.test_path
},
                wait=False)
Ejemplo n.º 16
0
    "sagemaker_enable_cloudwatch_metrics": "false",
    "sagemaker_job_name": "\"{}\"".format(job_name),
    "sagemaker_program": "\"{}\"".format("train.py"),
    "sagemaker_region": "\"{}\"".format("us-east-1")
}

sklearn = SKLearn(
    base_job_name=job_name,
    image_name='118104210923.dkr.ecr.us-east-1.amazonaws.com/scikit-nlp',
    entry_point='train.py',
    source_dir=source_dir,
    train_instance_type="ml.m5.24xlarge",
    output_path='s3://mctestraaa-pipeline-data/model/',
    hyperparameters=hyperparameters,
    role=role)
sklearn.fit({'train': train_input})

model_params = sklearn.create_model()

config_data_qa = {
    "Parameters": {
        "Environment": "qa",
        "ModelData": model_params.model_data,
        "ModelName": model_params.name,
        "SageMakerRole": model_params.role,
        "StackName": stack_name,
        "SourceDirectory": model_params.source_dir
    }
}

config_data_prod = {
Ejemplo n.º 17
0
    framework_version=FRAMEWORK_VERSION,
    instance_type="ml.c4.xlarge",
    role=SageMakerRole,
    sagemaker_session=sagemaker_session,
    hyperparameters={'max_leaf_nodes': 30})

sklearn_estimator_random_model = SKLearn(entry_point=sklearn_path_random_model,
                                         framework_version=FRAMEWORK_VERSION,
                                         instance_type="ml.c4.xlarge",
                                         role=SageMakerRole,
                                         sagemaker_session=sagemaker_session)

#This will start a SageMaker Training job that will download the
# data for us, invoke our scikit-learn code (in the provided script
# file), and save any model artifacts that the script creates.
sklearn_estimator_iris.fit({'train': train_input_iris})
sklearn_estimator_breast_cancer.fit(
    {'train': train_input_breast_cancer}
)  #always provide directory of s3 training/testing data which are parsed for the training

sklearn_estimator_random_model.fit({
    'train': train_input_breast_cancer,
    'test': test_input_breast_cancer
})

#Deploy the trained iris model to make inference requests
predictor_iris = sklearn_estimator_iris.deploy(initial_instance_count=1,
                                               instance_type="ml.m5.xlarge")

import itertools
import pandas as pd
Ejemplo n.º 18
0
"""
NOTE: You can not execute this file as it required AWS creds
"""
import json
import boto3
from sagemaker.sklearn.estimator import SKLearn

if __name__ == '__main__':
    role = '<Enter role>'
    aws_sklearn = SKLearn(entry_point='aws_main.py',
                          train_instance_type='ml.m4.xlarge',
                          role=role,
                          framework_version="0.23-1",
                          py_version="py3")

    aws_sklearn.fit({'train': 's3://mymlflowbucket/testdata.csv'})

    aws_sklearn_predictor = aws_sklearn.deploy(instance_type='ml.m4.xlarge',
                                               initial_instance_count=1)

    print(aws_sklearn_predictor.endpoint)

    # Testing
    runtime = boto3.client('sagemaker-runtime')

    input = {
        'features': [{
            'product': 1704,
            'amount': 1.0,
            'price': 50.748000000000005,
            'unit': -1,