class SagemakerRegression: def __init__(self, script_path, data_path, hyperparameters): self.script_path = script_path self.data_path = data_path self.hyperparameters = hyperparameters self.create_sagemaker_session() self.create_sklearn_estimator() def create_sagemaker_session(self): self.sagemaker_session = get_sagemaker_session() def create_sklearn_estimator(self): self.estimator = SKLearn(self.script_path, instance_type="ml.m4.xlarge", framework_version="0.20.0", sagemaker_session=self.sagemaker_session, role=aws_role, metric_definitions=[{ "Name": "train:mse", "Regex": "Train_mse=(.*?);" }, { "Name": "test:mse", "Regex": "Test_mse=(.*?);" }], hyperparameters=self.hyperparameters) def fit(self): self.estimator.fit({"train": self.data_path}, wait=False) def get_training_name(self): return self.estimator.latest_training_job.job_name
def train_model_sagemaker(X_train_path: str, sklearn_estimator_kwargs: Dict[str, Any]) -> str: """Train the linear regression model on SageMaker. Args: X_train_path: Full S3 path to `X_train` dataset. sklearn_estimator_kwargs: Keyword arguments that will be used to instantiate SKLearn estimator. Returns: Full S3 path to `model.tar.gz` file containing the model artifact. """ sklearn_estimator = SKLearn(**sklearn_estimator_kwargs) # we need a path to the directory containing both # X_train (feature table) and y_train (target variable) inputs_dir = X_train_path.rsplit("/", 1)[0] inputs = {"train": inputs_dir} # wait=True ensures that the execution is blocked # until the job finishes on SageMaker sklearn_estimator.fit(inputs=inputs, wait=True) training_job = sklearn_estimator.latest_training_job job_description = training_job.describe() model_path = job_description["ModelArtifacts"]["S3ModelArtifacts"] return model_path
def test_github_with_ssh_passphrase_not_configured(sagemaker_local_session, sklearn_latest_version, sklearn_latest_py_version): script_path = "mnist.py" data_path = os.path.join(DATA_DIR, "sklearn_mnist") git_config = { "repo": PRIVATE_GIT_REPO_2FA_SSH, "branch": PRIVATE_BRANCH_2FA, "commit": PRIVATE_COMMIT_2FA, } source_dir = "sklearn" sklearn = SKLearn( entry_point=script_path, role="SageMakerRole", source_dir=source_dir, instance_count=1, instance_type="local", sagemaker_session=sagemaker_local_session, framework_version=sklearn_latest_version, py_version=sklearn_latest_py_version, hyperparameters={"epochs": 1}, git_config=git_config, ) train_input = "file://" + os.path.join(data_path, "train") test_input = "file://" + os.path.join(data_path, "test") with pytest.raises(subprocess.CalledProcessError) as error: sklearn.fit({"train": train_input, "test": test_input}) assert "returned non-zero exit status" in str(error)
def cloud(): sklearn = SKLearn(entry_point='train.py', framework_version="0.23-1", instance_count=1, instance_type='ml.c4.xlarge', py_version="py3", sagemaker_role=role, sagemaker_session=sagemaker_session, source_dir='./src/') sklearn.fit({'train': train_path})
def test_private_github_with_2fa(sagemaker_local_session, sklearn_latest_version, sklearn_latest_py_version): script_path = "mnist.py" data_path = os.path.join(DATA_DIR, "sklearn_mnist") git_config = { "repo": PRIVATE_GIT_REPO_2FA, "branch": PRIVATE_BRANCH_2FA, "commit": PRIVATE_COMMIT_2FA, "2FA_enabled": True, "token": "", # TODO: find a secure approach } source_dir = "sklearn" sklearn = SKLearn( entry_point=script_path, role="SageMakerRole", source_dir=source_dir, py_version=sklearn_latest_py_version, instance_count=1, instance_type="local", sagemaker_session=sagemaker_local_session, framework_version=sklearn_latest_version, hyperparameters={"epochs": 1}, git_config=git_config, ) train_input = "file://" + os.path.join(data_path, "train") test_input = "file://" + os.path.join(data_path, "test") sklearn.fit({"train": train_input, "test": test_input}) assert os.path.isdir(sklearn.source_dir) with lock.lock(LOCK_PATH): try: client = sagemaker_local_session.sagemaker_client desc = client.describe_training_job( TrainingJobName=sklearn.latest_training_job.name) model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] model = SKLearnModel( model_data, "SageMakerRole", entry_point=script_path, framework_version=sklearn_latest_version, source_dir=source_dir, sagemaker_session=sagemaker_local_session, git_config=git_config, ) predictor = model.deploy(1, "local") data = numpy.zeros((100, 784), dtype="float32") result = predictor.predict(data) assert result is not None finally: predictor.delete_endpoint()
def cloud(): sklearn = SKLearn(entry_point='train.py', source_dir='./src/', role=role, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, hyperparameters={ 'max_depth': 5, 'n_estimators': 10 }) sklearn.fit({'train': train_path})
def local(): sklearn = SKLearn(entry_point='train.py', source_dir='./src/', role=role, train_instance_count=1, train_instance_type='local', hyperparameters={ 'max_depth': 5, 'n_estimators': 10 }) sklearn.fit({'train': 'file://models/train.csv'}) predictor = sklearn.deploy(initial_instance_count=1, instance_type='local') test_data = pd.read_csv('./models/test.csv', header=None, names=None) test_y = test_data.iloc[:, 0] test_x = test_data.iloc[:, 1:] test_y_preds = predictor.predict(test_x) accuracy = accuracy_score(test_y, test_y_preds) print('The current accuracy score for the prediction', accuracy)
def test_training_script_in_local_container(inspectlocal): code_path = "../../src/mlmax/train.py" train_data_path = "opt/ml/processing/train/" test_data_path = "opt/ml/processing/test/" sklearn = SKLearn( entry_point=code_path, role=role, py_version="py3", framework_version="0.20.0", instance_type="local", hyperparameters={"inspect": True if inspectlocal else None}, ) sklearn.fit( { "train": "file://" + train_data_path, "test": "file://" + test_data_path }, wait=True, )
def train(): try: #Create a sagemaker.sklearn.SKLearn Estimator aws_sklearn = SKLearn(entry_point=TRAIN_SCRIPT, source_dir=SOURCE, train_instance_type='ml.m4.xlarge', role=ROLE) #Call the fit method on SKlearn estimator which uses our python script to train the model aws_sklearn.fit({'train':TRAIN_DATA}) #Deploy the model created in previous step and create an endpoint aws_sklearn_predictor = aws_sklearn.deploy(instance_type='ml.m4.xlarge', initial_instance_count=1) except Exception as e: return e else: return 'success'
def train_deploy_model( keys, instance='ml.m4.xlarge', # Don't change this! instance_count=1, # Don't change this! model_path='tmp/model/model.py', key_bucket='tmp/train/embeddings', # It was: tmp/data/data.pickle. data.pickle is harcoded inside the function update=True, # This should be always true if there is an open endpoint hyperparms=None): """ This function trains a sagemaker model and deploys it. Args: keys (json): Json with credential keys instance (str): instance type to train model and deploy it instance_count (int): initial instance count for deploying the model model_path (str): Directory path where the model is located hyperparms (dictionary): Hyperparameters for SVM Returns: Print statement """ with open(keys) as k: keys = json.load(k) session = boto3.session.Session( aws_access_key_id=keys["AWS_ACCESS_KEY_ID"], aws_secret_access_key=keys["AWS_SECRET_ACCESS_KEY"], region_name=keys["REGION_NAME"]) #sagemaker_session = sagemaker.local.LocalSession(boto_session = session) sagemaker_session = sagemaker.Session(boto_session=session) if not hyperparms: print(model_path) sklearn = SKLearn(entry_point=model_path, train_instance_type=instance, role=keys["ROLE"], sagemaker_session=sagemaker_session) else: print(model_path) sklearn = SKLearn(entry_point=model_path, train_instance_type=instance, role=keys["ROLE"], sagemaker_session=sagemaker_session, hyperparameters=hyperparms) ## Data for training inputs = sagemaker_session.upload_data(path='tmp/train/embeddings', key_prefix=key_bucket, bucket=keys["BUCKET_NAME"]) ## Training the model sklearn.fit({'train': inputs}) ## Deploying the model try: predictor = sklearn.deploy(initial_instance_count=instance_count, instance_type=instance, endpoint_name=keys["ENDPOINT_NAME"], update_endpoint=update) except: print("The model was not deployed") return print("Endpoint updated: {}".format(keys["ENDPOINT_NAME"]))
args = parse_train_args() sm_boto3 = boto3.client('sagemaker') sess = sagemaker.Session() region = sess.boto_session.region_name bucket = sess.default_bucket() # this could also be a hard-coded bucket name print('Using bucket ' + bucket) sm_role = get_sm_execution_role(False, region) fdir = os.path.abspath(os.path.dirname(__file__)) sklearn_preprocessor = SKLearn( entry_point='train_preproc.py', source_dir=fdir, role=sm_role, train_instance_type="ml.c4.xlarge", base_job_name='preproc-scikit') prefix = 'inference-pipeline-scikit-linearlearner' # curl -O https://s3-us-west-2.amazonaws.com/sparkml-mleap/data/abalone/abalone.csv # train_input = sess.upload_data( # path=os.path.join(fdir, 'abalone.csv'), # bucket=bucket, # key_prefix='{}/{}'.format(prefix, 'train')) train_input = args.train_s3_path # there is no need to validate models for pre-processing, so no SM_CHANNEL_TEST sklearn_preprocessor.fit({'train': train_input}) print(f'train input on S3 - {train_input}') # https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/estimator.py#L724-L741 print(f'SKlearn preprocessor trained model uploaded to - {sklearn_preprocessor.model_data}')
role=get_execution_role(), train_instance_count=1, train_instance_type='ml.m4.xlarge', framework_version='0.20.0', metric_definitions=[{ 'Name': 'median-AE', 'Regex': "AE-at-50th-percentile: ([0-9.]+).*$" }], hyperparameters={ 'n-estimators': 100, 'min-samples-leaf': 2, 'target': 'churn' }) sklearn_estimator.fit({'train': trainpath, 'test': testpath}, wait=True) # And now we are ready to host the model # In[ ]: sm_boto3 = boto3.client('sagemaker') artifact = sm_boto3.describe_training_job( TrainingJobName=sklearn_estimator.latest_training_job.name )['ModelArtifacts']['S3ModelArtifacts'] print('Model artifact persisted at ' + artifact) # In[ ]: from sagemaker.sklearn.model import SKLearnModel
# TESTING: Confirm that data is in S3 bucket # empty_check = [] # for obj in boto3.resource('s3').Bucket(bucket).objects.all(): # empty_check.append(obj.key) # print(obj.key) # assert len(empty_check) !=0, 'S3 bucket is empty.' # print('Test passed!') # Specify an output path output_path = 's3://{}/{}'.format(bucket, prefix) estimator = SKLearn( entry_point='train.py', source_dir='src', role=role, framework_version="0.23-1", py_version="py3", instance_count=1, instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, output_path=output_path, ) # Train your estimator on S3 training data estimator.fit({'train': input_data}) # deploy your model to create a predictor predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.t2.medium')
# -*- coding: utf-8 -*- # Deploy the model from sagemaker.sklearn.estimator import SKLearn role = 'SageMakerFullAccess_sklearn_api_test' # Create the SKLearn Object by directing it to the aws_sklearn_main.py script aws_sklearn = SKLearn(entry_point='aws_sklearn_main.py', train_instance_type='ml.m4.xlarge', role=role) # Train the model using by passing the path to the S3 bucket with the training data aws_sklearn.fit({'train': 's3://replace-with-your-bucket-name/'}) # Deploy model aws_sklearn_predictor = aws_sklearn.deploy(instance_type='ml.t2.medium', initial_instance_count=1) # Print the endpoint to test in next step print(aws_sklearn_predictor.endpoint) # Uncomment and run to terminate the endpoint after you are finished #predictor.delete_endpoint()
import config # Get the working path of script p = abspath(getsourcefile(lambda: 0)) p = p.rsplit('/', 1)[0] os.chdir(p) print('Working Directory is: %s' % os.getcwd()) model_name = 'rf' FRAMEWORK_VERSION = '0.23-1' # framework version role = config.aws_role # get execution role aws_sklearn = SKLearn( entry_point=p + '/model_scripts_aws/' + model_name + '.py', # change script name for different model train_instance_type='ml.m4.2xlarge', framework_version=FRAMEWORK_VERSION, base_job_name=config.job_name + model_name, # change for any name role=role # source_dir='./', # requirements_file='requirements.txt' ) # Send model to train aws_sklearn.fit({ 'train': config.train_path, 'test': config.test_path }, wait=False)
"sagemaker_enable_cloudwatch_metrics": "false", "sagemaker_job_name": "\"{}\"".format(job_name), "sagemaker_program": "\"{}\"".format("train.py"), "sagemaker_region": "\"{}\"".format("us-east-1") } sklearn = SKLearn( base_job_name=job_name, image_name='118104210923.dkr.ecr.us-east-1.amazonaws.com/scikit-nlp', entry_point='train.py', source_dir=source_dir, train_instance_type="ml.m5.24xlarge", output_path='s3://mctestraaa-pipeline-data/model/', hyperparameters=hyperparameters, role=role) sklearn.fit({'train': train_input}) model_params = sklearn.create_model() config_data_qa = { "Parameters": { "Environment": "qa", "ModelData": model_params.model_data, "ModelName": model_params.name, "SageMakerRole": model_params.role, "StackName": stack_name, "SourceDirectory": model_params.source_dir } } config_data_prod = {
framework_version=FRAMEWORK_VERSION, instance_type="ml.c4.xlarge", role=SageMakerRole, sagemaker_session=sagemaker_session, hyperparameters={'max_leaf_nodes': 30}) sklearn_estimator_random_model = SKLearn(entry_point=sklearn_path_random_model, framework_version=FRAMEWORK_VERSION, instance_type="ml.c4.xlarge", role=SageMakerRole, sagemaker_session=sagemaker_session) #This will start a SageMaker Training job that will download the # data for us, invoke our scikit-learn code (in the provided script # file), and save any model artifacts that the script creates. sklearn_estimator_iris.fit({'train': train_input_iris}) sklearn_estimator_breast_cancer.fit( {'train': train_input_breast_cancer} ) #always provide directory of s3 training/testing data which are parsed for the training sklearn_estimator_random_model.fit({ 'train': train_input_breast_cancer, 'test': test_input_breast_cancer }) #Deploy the trained iris model to make inference requests predictor_iris = sklearn_estimator_iris.deploy(initial_instance_count=1, instance_type="ml.m5.xlarge") import itertools import pandas as pd
""" NOTE: You can not execute this file as it required AWS creds """ import json import boto3 from sagemaker.sklearn.estimator import SKLearn if __name__ == '__main__': role = '<Enter role>' aws_sklearn = SKLearn(entry_point='aws_main.py', train_instance_type='ml.m4.xlarge', role=role, framework_version="0.23-1", py_version="py3") aws_sklearn.fit({'train': 's3://mymlflowbucket/testdata.csv'}) aws_sklearn_predictor = aws_sklearn.deploy(instance_type='ml.m4.xlarge', initial_instance_count=1) print(aws_sklearn_predictor.endpoint) # Testing runtime = boto3.client('sagemaker-runtime') input = { 'features': [{ 'product': 1704, 'amount': 1.0, 'price': 50.748000000000005, 'unit': -1,