def local(): sklearn = SKLearn(entry_point='train.py', source_dir='./src/', role=role, train_instance_count=1, train_instance_type='local', hyperparameters={ 'max_depth': 5, 'n_estimators': 10 }) sklearn.fit({'train': 'file://models/train.csv'}) predictor = sklearn.deploy(initial_instance_count=1, instance_type='local') test_data = pd.read_csv('./models/test.csv', header=None, names=None) test_y = test_data.iloc[:, 0] test_x = test_data.iloc[:, 1:] test_y_preds = predictor.predict(test_x) accuracy = accuracy_score(test_y, test_y_preds) print('The current accuracy score for the prediction', accuracy)
def train(): try: #Create a sagemaker.sklearn.SKLearn Estimator aws_sklearn = SKLearn(entry_point=TRAIN_SCRIPT, source_dir=SOURCE, train_instance_type='ml.m4.xlarge', role=ROLE) #Call the fit method on SKlearn estimator which uses our python script to train the model aws_sklearn.fit({'train':TRAIN_DATA}) #Deploy the model created in previous step and create an endpoint aws_sklearn_predictor = aws_sklearn.deploy(instance_type='ml.m4.xlarge', initial_instance_count=1) except Exception as e: return e else: return 'success'
# TESTING: Confirm that data is in S3 bucket # empty_check = [] # for obj in boto3.resource('s3').Bucket(bucket).objects.all(): # empty_check.append(obj.key) # print(obj.key) # assert len(empty_check) !=0, 'S3 bucket is empty.' # print('Test passed!') # Specify an output path output_path = 's3://{}/{}'.format(bucket, prefix) estimator = SKLearn( entry_point='train.py', source_dir='src', role=role, framework_version="0.23-1", py_version="py3", instance_count=1, instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, output_path=output_path, ) # Train your estimator on S3 training data estimator.fit({'train': input_data}) # deploy your model to create a predictor predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.t2.medium')
# -*- coding: utf-8 -*- # Deploy the model from sagemaker.sklearn.estimator import SKLearn role = 'SageMakerFullAccess_sklearn_api_test' # Create the SKLearn Object by directing it to the aws_sklearn_main.py script aws_sklearn = SKLearn(entry_point='aws_sklearn_main.py', train_instance_type='ml.m4.xlarge', role=role) # Train the model using by passing the path to the S3 bucket with the training data aws_sklearn.fit({'train': 's3://replace-with-your-bucket-name/'}) # Deploy model aws_sklearn_predictor = aws_sklearn.deploy(instance_type='ml.t2.medium', initial_instance_count=1) # Print the endpoint to test in next step print(aws_sklearn_predictor.endpoint) # Uncomment and run to terminate the endpoint after you are finished #predictor.delete_endpoint()
def train_deploy_model( keys, instance='ml.m4.xlarge', # Don't change this! instance_count=1, # Don't change this! model_path='tmp/model/model.py', key_bucket='tmp/train/embeddings', # It was: tmp/data/data.pickle. data.pickle is harcoded inside the function update=True, # This should be always true if there is an open endpoint hyperparms=None): """ This function trains a sagemaker model and deploys it. Args: keys (json): Json with credential keys instance (str): instance type to train model and deploy it instance_count (int): initial instance count for deploying the model model_path (str): Directory path where the model is located hyperparms (dictionary): Hyperparameters for SVM Returns: Print statement """ with open(keys) as k: keys = json.load(k) session = boto3.session.Session( aws_access_key_id=keys["AWS_ACCESS_KEY_ID"], aws_secret_access_key=keys["AWS_SECRET_ACCESS_KEY"], region_name=keys["REGION_NAME"]) #sagemaker_session = sagemaker.local.LocalSession(boto_session = session) sagemaker_session = sagemaker.Session(boto_session=session) if not hyperparms: print(model_path) sklearn = SKLearn(entry_point=model_path, train_instance_type=instance, role=keys["ROLE"], sagemaker_session=sagemaker_session) else: print(model_path) sklearn = SKLearn(entry_point=model_path, train_instance_type=instance, role=keys["ROLE"], sagemaker_session=sagemaker_session, hyperparameters=hyperparms) ## Data for training inputs = sagemaker_session.upload_data(path='tmp/train/embeddings', key_prefix=key_bucket, bucket=keys["BUCKET_NAME"]) ## Training the model sklearn.fit({'train': inputs}) ## Deploying the model try: predictor = sklearn.deploy(initial_instance_count=instance_count, instance_type=instance, endpoint_name=keys["ENDPOINT_NAME"], update_endpoint=update) except: print("The model was not deployed") return print("Endpoint updated: {}".format(keys["ENDPOINT_NAME"]))
""" import json import boto3 from sagemaker.sklearn.estimator import SKLearn if __name__ == '__main__': role = '<Enter role>' aws_sklearn = SKLearn(entry_point='aws_main.py', train_instance_type='ml.m4.xlarge', role=role, framework_version="0.23-1", py_version="py3") aws_sklearn.fit({'train': 's3://mymlflowbucket/testdata.csv'}) aws_sklearn_predictor = aws_sklearn.deploy(instance_type='ml.m4.xlarge', initial_instance_count=1) print(aws_sklearn_predictor.endpoint) # Testing runtime = boto3.client('sagemaker-runtime') input = { 'features': [{ 'product': 1704, 'amount': 1.0, 'price': 50.748000000000005, 'unit': -1, 'tax': 24.0, 'invoiceid': 393, 'bodyid': 0,
#This will start a SageMaker Training job that will download the # data for us, invoke our scikit-learn code (in the provided script # file), and save any model artifacts that the script creates. sklearn_estimator_iris.fit({'train': train_input_iris}) sklearn_estimator_breast_cancer.fit( {'train': train_input_breast_cancer} ) #always provide directory of s3 training/testing data which are parsed for the training sklearn_estimator_random_model.fit({ 'train': train_input_breast_cancer, 'test': test_input_breast_cancer }) #Deploy the trained iris model to make inference requests predictor_iris = sklearn_estimator_iris.deploy(initial_instance_count=1, instance_type="ml.m5.xlarge") import itertools import pandas as pd shape = pd.read_csv( "AWS_Sagemaker/sklearn_sagemaker_deploy/data/iris_data.csv", header=None) a = [50 * i for i in range(3)] b = [40 + i for i in range(10)] indices = [i + j for i, j in itertools.product(a, b)] test_data = shape.iloc[indices[:-1]] test_X = test_data.iloc[:, 1:] test_y = test_data.iloc[:, 0]