def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_dir', default='data/') args = parser.parse_args() data_dir = Path(args.data_dir) if not data_dir.exists(): (x_train, y_train), (x_test, y_test) = mnist.load_data() data_dir.mkdir() np.savez(str(data_dir / 'train'), image=x_train, label=y_train) np.savez(str(data_dir / 'test'), image=x_test, label=y_test) session = Session() s3_bucket_name = os.getenv('S3_BUCKET_NAME', session.default_bucket()) session.upload_data(path=str(data_dir), bucket=s3_bucket_name, key_prefix='dataset/mnist')
def main(): session = Session() s3_bucket_name = os.getenv('S3_BUCKET_NAME', session.default_bucket()) parser = argparse.ArgumentParser() parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--role', default=os.environ['SAGEMAKER_ROLE']) parser.add_argument('--input_data', default=f's3://{s3_bucket_name}/dataset/mnist') parser.add_argument('--output_path', default=f's3://{s3_bucket_name}/training') parser.add_argument('--train_instance_type', default='ml.m5.large') parser.add_argument('--wait', action='store_true') # parser.add_argument('--deploy', action='store_true') args = parser.parse_args() input_data = {'dataset': args.input_data} job_name = 'mnist-' + dt.now().strftime('%Y-%m-%d-%H-%M') hyperparameters = {'batch_size': args.batch_size, 'epochs': args.epochs} metric_definitions = [ { 'Name': 'train loss', 'Regex': r'loss: (\S+)' }, { 'Name': 'valid loss', 'Regex': r'val_loss: (\S+)' }, ] estimator = TensorFlow(entry_point='train.py', source_dir='src', role=args.role, train_instance_count=1, train_instance_type=args.train_instance_type, train_volume_size=30, train_max_run=86400, output_path=args.output_path, code_location=args.output_path, py_version='py3', framework_version='1.12.0', hyperparameters=hyperparameters, metric_definitions=metric_definitions) estimator.fit(input_data, wait=args.wait, job_name=job_name)
class SageMakerKMeansBatchPredictorIT(ConnectableComponent): def __init__(self, engine): super(SageMakerKMeansBatchPredictorIT, self).__init__(engine) self._dataset_s3_url = None self._bucket_name = None self._local_model_filepath = None self._model_s3_filepath = None self._results_s3_location = None self._model_name = None self._job_name = None self._instance_type = None self._instance_count = None self._sagemaker_session = Session() self._sagemaker_client = boto3.client('sagemaker') self._aws_helper = AwsHelper(self._logger) self._job_monitor = None def _materialize(self, parent_data_objs, user_data): if not parent_data_objs: raise MLCompException( "Missing expected dataset S3 url from parent input!") if not self._init_params(parent_data_objs): return self._upload_model_to_s3() self._create_model() self._create_transformation_job() self._monitor_job() return [self._predictions_s3_url()] def _init_params(self, parent_data_objs): self._dataset_s3_url = parent_data_objs[0] self._local_model_filepath = self._params['local_model_filepath'] if not self._local_model_filepath or not os.path.isfile( self._local_model_filepath): self._logger.info("Input model is empty! Skip prediction!") return False self._bucket_name = self._params.get('bucket_name') if not self._bucket_name: self._bucket_name = self._sagemaker_session.default_bucket() self._model_s3_filepath = self._params.get('model_s3_filepath') self._results_s3_location = self._params.get('results_s3_location') if not self._results_s3_location: bucket_name, input_rltv_path = AwsHelper.s3_url_parse( self._dataset_s3_url) self._results_s3_location = "s3://{}/prediction/results".format( bucket_name) self._skip_s3_model_uploading = str2bool( self._params.get('skip_s3_model_uploading')) self._instance_type = self._params.get('instance_type', 'ml.m4.xlarge') self._instance_count = self._params.get('instance_count', 1) return True def _upload_model_to_s3(self): self._model_s3_filepath = self._aws_helper.upload_file( self._local_model_filepath, self._bucket_name, self._model_s3_filepath, self._skip_s3_model_uploading) def _create_model(self): self._model_name = "Kmeans-model-{}".format( strftime("%Y-%m-%d-%H-%M-%S", gmtime())) self._logger.info("Creating SageMaker KMeans model ... {}".format( self._model_name)) primary_container = { 'Image': get_image_uri(self._sagemaker_session.boto_region_name, 'kmeans'), 'ModelDataUrl': self._model_s3_filepath } create_model_response = self._sagemaker_client.create_model( ModelName=self._model_name, ExecutionRoleArn=self._ml_engine.iam_role, PrimaryContainer=primary_container) model_arn = create_model_response['ModelArn'] self._logger.info( "Model created successfully! name: {}, arn: {}".format( self._model_name, model_arn)) def _create_transformation_job(self): self._job_name = 'kmeans-batch-prediction-' + strftime( "%Y-%m-%d-%H-%M-%S", gmtime()) self._logger.info( "Setup transform job, job-name: {}, input-dataset: {}, output-path-root:{}" .format(self._job_name, self._dataset_s3_url, self._results_s3_location)) request = { "TransformJobName": self._job_name, "ModelName": self._model_name, "MaxConcurrentTransforms": 4, "MaxPayloadInMB": 6, "BatchStrategy": "MultiRecord", "TransformOutput": { "S3OutputPath": self._results_s3_location }, "TransformInput": { "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": self._dataset_s3_url } }, "ContentType": "text/csv;label_size=0", "SplitType": "Line", "CompressionType": "None" }, "TransformResources": { "InstanceType": self._instance_type, "InstanceCount": self._instance_count } } self._sagemaker_client.create_transform_job(**request) self._logger.info("Created transform job with name: {}".format( self._job_name)) def _monitor_job(self): JobMonitorTransformer(self._sagemaker_client, self._job_name, self._logger).monitor() def _predictions_s3_url(self): _, input_rltv_path = AwsHelper.s3_url_parse(self._dataset_s3_url) predictions_s3_url = "{}/{}.out".format(self._results_s3_location, input_rltv_path) return predictions_s3_url
class Sagemaker: """ Class to provide AWS specific execution of the models. In the future, we can make a superclass that defines the basic methods (such as uploading data to the right folder/location, loading models etc). For now, we will only have AWS. This will be very similar to default session objects. """ training_instance_count = 1 training_instance_type = "ml.m4.xlarge" transformer_instance_count = 1 transformer_instance_type = "ml.c4.xlarge" deploy_instance_count = 1 deploy_instance_type = "ml.c4.xlarge" def __init__( self, bucket: Optional[str] = None, role: Optional[str] = None, prefix: Optional[str] = None, default_model_kwargs: Optional[Dict] = None, default_transfomer_kwargs: Optional[Dict] = None, default_deploy_kwargs: Optional[Dict] = None, ) -> None: """ Initializes the AWS object Arguments: bucket: The bucket name. Defaulted to the session default bucket role: The role name to assume. Default is getting from AWS_DEFAULT_ROLE of the env variables prefix: The prefix to use in the bucket. Defaulted to 'data' default_model_kwargs: Dict for default kwargs for any sagemaker model. Default contains train_instance_type, train_instance_count, role and session default_transformer_kwargs: Dict for default kwargs for any sagemaker transformer. Default contains instance_type, instance_count, and role. default_deploy_kwargs: Dict for default kwargs for any sagemaker deployment. Default contains instance_type and initial_instance_count. """ LOGGER.info("Initializing Sagemaker executor") self.boto_session = BotoSession( aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"), aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"), region_name="eu-west-1", ) self.region = self.boto_session.region_name self.session = Session(boto_session=self.boto_session) self.role = role if role is not None else os.environ.get( "AWS_DEFAULT_ROLE") self.bucket = bucket if bucket is not None else self.session.default_bucket( ) self.prefix = prefix if prefix is not None else "data" self.default_model_kwargs = self._default_model_kwargs( self.role, self.session, default_model_kwargs) self.default_transformer_kwargs = self._default_transformer_kwargs( self.role, self.session, default_transfomer_kwargs) self.default_deploy_kwargs = self._default_deploy_kwargs( self.role, self.session, default_deploy_kwargs) def _default_model_kwargs(self, role, session, input_default) -> Dict: initial = { "role": role, "sagemaker_session": session, "train_instance_count": self.training_instance_count, "train_instance_type": self.training_instance_type, } if input_default is not None: initial.update(input_default) return initial def _default_transformer_kwargs(self, role, session, input_default) -> Dict: initial = { "role": role, "instance_count": self.transformer_instance_count, "instance_type": self.transformer_instance_type, } if input_default is not None: initial.update(input_default) return initial def _default_deploy_kwargs(self, role, session, input_default) -> Dict: initial = { "initial_instance_count": self.deploy_instance_count, "instance_type": self.deploy_instance_type, } if input_default is not None: initial.update(input_default) return initial def upload_data( self, local_data_file: str, bucket: Optional[str] = None, prefix: Optional[str] = None, ) -> str: """ Uploads the data from the local data file to S3. Returns the location Argument: local_data_file: the location of the data bucket: The bucket to upload to. Defaulted to the own default bucket prefix: The prefix to use to upload to. Defaulted to the own default bucket Returns: The s3 data location """ if bucket is None: bucket = self.bucket if prefix is None: prefix = self.prefix LOGGER.info("Uploading data to S3") return self.session.upload_data(local_data_file, bucket=bucket, key_prefix=prefix) def download_data( self, file_name: str, local_file_directory: str, bucket: Optional[str] = None, prefix: Optional[str] = None, ) -> str: """ Downloads the S3 data and stores it to the local file location. Arguments: file_name: the name of the file local_file_directory: the directory to store the data to bucket: The bucket to upload to. Defaulted to the own default bucket prefix: The prefix to use to upload to. Defaulted to the own default bucket Returns: The local file location. """ s3_client = self.boto_session.client("s3") if prefix is None: prefix = self.prefix key = f"{prefix}/{file_name}" local_file_name = os.path.join(local_file_directory, file_name) LOGGER.info( f"Downloading data from s3: from s3://{self.bucket}/{key} to {local_file_name}" ) if not os.path.exists(local_file_directory): os.makedirs(local_file_directory) s3_client.download_file(Bucket=self.bucket, Key=key, Filename=local_file_name) return local_file_name