def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', default='data/')
    args = parser.parse_args()

    data_dir = Path(args.data_dir)
    if not data_dir.exists():
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        data_dir.mkdir()
        np.savez(str(data_dir / 'train'), image=x_train, label=y_train)
        np.savez(str(data_dir / 'test'), image=x_test, label=y_test)

    session = Session()
    s3_bucket_name = os.getenv('S3_BUCKET_NAME', session.default_bucket())
    session.upload_data(path=str(data_dir),
                        bucket=s3_bucket_name,
                        key_prefix='dataset/mnist')
Example #2
0
def main():
    session = Session()
    s3_bucket_name = os.getenv('S3_BUCKET_NAME', session.default_bucket())

    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--role', default=os.environ['SAGEMAKER_ROLE'])
    parser.add_argument('--input_data',
                        default=f's3://{s3_bucket_name}/dataset/mnist')
    parser.add_argument('--output_path',
                        default=f's3://{s3_bucket_name}/training')
    parser.add_argument('--train_instance_type', default='ml.m5.large')
    parser.add_argument('--wait', action='store_true')
    # parser.add_argument('--deploy', action='store_true')
    args = parser.parse_args()

    input_data = {'dataset': args.input_data}
    job_name = 'mnist-' + dt.now().strftime('%Y-%m-%d-%H-%M')

    hyperparameters = {'batch_size': args.batch_size, 'epochs': args.epochs}

    metric_definitions = [
        {
            'Name': 'train loss',
            'Regex': r'loss: (\S+)'
        },
        {
            'Name': 'valid loss',
            'Regex': r'val_loss: (\S+)'
        },
    ]
    estimator = TensorFlow(entry_point='train.py',
                           source_dir='src',
                           role=args.role,
                           train_instance_count=1,
                           train_instance_type=args.train_instance_type,
                           train_volume_size=30,
                           train_max_run=86400,
                           output_path=args.output_path,
                           code_location=args.output_path,
                           py_version='py3',
                           framework_version='1.12.0',
                           hyperparameters=hyperparameters,
                           metric_definitions=metric_definitions)
    estimator.fit(input_data, wait=args.wait, job_name=job_name)
class SageMakerKMeansBatchPredictorIT(ConnectableComponent):
    def __init__(self, engine):
        super(SageMakerKMeansBatchPredictorIT, self).__init__(engine)
        self._dataset_s3_url = None
        self._bucket_name = None
        self._local_model_filepath = None
        self._model_s3_filepath = None
        self._results_s3_location = None
        self._model_name = None
        self._job_name = None
        self._instance_type = None
        self._instance_count = None

        self._sagemaker_session = Session()
        self._sagemaker_client = boto3.client('sagemaker')
        self._aws_helper = AwsHelper(self._logger)
        self._job_monitor = None

    def _materialize(self, parent_data_objs, user_data):
        if not parent_data_objs:
            raise MLCompException(
                "Missing expected dataset S3 url from parent input!")

        if not self._init_params(parent_data_objs):
            return

        self._upload_model_to_s3()
        self._create_model()
        self._create_transformation_job()
        self._monitor_job()
        return [self._predictions_s3_url()]

    def _init_params(self, parent_data_objs):
        self._dataset_s3_url = parent_data_objs[0]

        self._local_model_filepath = self._params['local_model_filepath']
        if not self._local_model_filepath or not os.path.isfile(
                self._local_model_filepath):
            self._logger.info("Input model is empty! Skip prediction!")
            return False

        self._bucket_name = self._params.get('bucket_name')
        if not self._bucket_name:
            self._bucket_name = self._sagemaker_session.default_bucket()

        self._model_s3_filepath = self._params.get('model_s3_filepath')

        self._results_s3_location = self._params.get('results_s3_location')
        if not self._results_s3_location:
            bucket_name, input_rltv_path = AwsHelper.s3_url_parse(
                self._dataset_s3_url)
            self._results_s3_location = "s3://{}/prediction/results".format(
                bucket_name)

        self._skip_s3_model_uploading = str2bool(
            self._params.get('skip_s3_model_uploading'))

        self._instance_type = self._params.get('instance_type', 'ml.m4.xlarge')
        self._instance_count = self._params.get('instance_count', 1)

        return True

    def _upload_model_to_s3(self):
        self._model_s3_filepath = self._aws_helper.upload_file(
            self._local_model_filepath, self._bucket_name,
            self._model_s3_filepath, self._skip_s3_model_uploading)

    def _create_model(self):
        self._model_name = "Kmeans-model-{}".format(
            strftime("%Y-%m-%d-%H-%M-%S", gmtime()))
        self._logger.info("Creating SageMaker KMeans model ... {}".format(
            self._model_name))

        primary_container = {
            'Image':
            get_image_uri(self._sagemaker_session.boto_region_name, 'kmeans'),
            'ModelDataUrl':
            self._model_s3_filepath
        }

        create_model_response = self._sagemaker_client.create_model(
            ModelName=self._model_name,
            ExecutionRoleArn=self._ml_engine.iam_role,
            PrimaryContainer=primary_container)
        model_arn = create_model_response['ModelArn']
        self._logger.info(
            "Model created successfully! name: {}, arn: {}".format(
                self._model_name, model_arn))

    def _create_transformation_job(self):
        self._job_name = 'kmeans-batch-prediction-' + strftime(
            "%Y-%m-%d-%H-%M-%S", gmtime())
        self._logger.info(
            "Setup transform job, job-name: {}, input-dataset: {}, output-path-root:{}"
            .format(self._job_name, self._dataset_s3_url,
                    self._results_s3_location))

        request = {
            "TransformJobName": self._job_name,
            "ModelName": self._model_name,
            "MaxConcurrentTransforms": 4,
            "MaxPayloadInMB": 6,
            "BatchStrategy": "MultiRecord",
            "TransformOutput": {
                "S3OutputPath": self._results_s3_location
            },
            "TransformInput": {
                "DataSource": {
                    "S3DataSource": {
                        "S3DataType": "S3Prefix",
                        "S3Uri": self._dataset_s3_url
                    }
                },
                "ContentType": "text/csv;label_size=0",
                "SplitType": "Line",
                "CompressionType": "None"
            },
            "TransformResources": {
                "InstanceType": self._instance_type,
                "InstanceCount": self._instance_count
            }
        }

        self._sagemaker_client.create_transform_job(**request)
        self._logger.info("Created transform job with name: {}".format(
            self._job_name))

    def _monitor_job(self):
        JobMonitorTransformer(self._sagemaker_client, self._job_name,
                              self._logger).monitor()

    def _predictions_s3_url(self):
        _, input_rltv_path = AwsHelper.s3_url_parse(self._dataset_s3_url)
        predictions_s3_url = "{}/{}.out".format(self._results_s3_location,
                                                input_rltv_path)
        return predictions_s3_url
class Sagemaker:
    """
    Class to provide AWS specific execution of the models.
    In the future, we can make a superclass that defines the basic methods (such as
    uploading data to the right folder/location, loading models etc).
    For now, we will only have AWS.
    This will be very similar to default session objects.
    """

    training_instance_count = 1
    training_instance_type = "ml.m4.xlarge"
    transformer_instance_count = 1
    transformer_instance_type = "ml.c4.xlarge"
    deploy_instance_count = 1
    deploy_instance_type = "ml.c4.xlarge"

    def __init__(
        self,
        bucket: Optional[str] = None,
        role: Optional[str] = None,
        prefix: Optional[str] = None,
        default_model_kwargs: Optional[Dict] = None,
        default_transfomer_kwargs: Optional[Dict] = None,
        default_deploy_kwargs: Optional[Dict] = None,
    ) -> None:
        """
        Initializes the AWS object

        Arguments:
            bucket: The bucket name. Defaulted to the session default bucket
            role: The role name to assume. Default is getting from AWS_DEFAULT_ROLE of the env variables
            prefix: The prefix to use in the bucket. Defaulted to 'data'
            default_model_kwargs: Dict for default kwargs for any sagemaker model.
                Default contains train_instance_type, train_instance_count, role and session
            default_transformer_kwargs: Dict for default kwargs for any sagemaker transformer.
                Default contains instance_type, instance_count, and role.
            default_deploy_kwargs: Dict for default kwargs for any sagemaker deployment.
                Default contains instance_type and initial_instance_count.
        """
        LOGGER.info("Initializing Sagemaker executor")
        self.boto_session = BotoSession(
            aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
            aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
            region_name="eu-west-1",
        )
        self.region = self.boto_session.region_name
        self.session = Session(boto_session=self.boto_session)
        self.role = role if role is not None else os.environ.get(
            "AWS_DEFAULT_ROLE")
        self.bucket = bucket if bucket is not None else self.session.default_bucket(
        )
        self.prefix = prefix if prefix is not None else "data"
        self.default_model_kwargs = self._default_model_kwargs(
            self.role, self.session, default_model_kwargs)
        self.default_transformer_kwargs = self._default_transformer_kwargs(
            self.role, self.session, default_transfomer_kwargs)
        self.default_deploy_kwargs = self._default_deploy_kwargs(
            self.role, self.session, default_deploy_kwargs)

    def _default_model_kwargs(self, role, session, input_default) -> Dict:
        initial = {
            "role": role,
            "sagemaker_session": session,
            "train_instance_count": self.training_instance_count,
            "train_instance_type": self.training_instance_type,
        }
        if input_default is not None:
            initial.update(input_default)
        return initial

    def _default_transformer_kwargs(self, role, session,
                                    input_default) -> Dict:
        initial = {
            "role": role,
            "instance_count": self.transformer_instance_count,
            "instance_type": self.transformer_instance_type,
        }
        if input_default is not None:
            initial.update(input_default)
        return initial

    def _default_deploy_kwargs(self, role, session, input_default) -> Dict:
        initial = {
            "initial_instance_count": self.deploy_instance_count,
            "instance_type": self.deploy_instance_type,
        }
        if input_default is not None:
            initial.update(input_default)
        return initial

    def upload_data(
        self,
        local_data_file: str,
        bucket: Optional[str] = None,
        prefix: Optional[str] = None,
    ) -> str:
        """
        Uploads the data from the local data file to S3. Returns the location

        Argument:
            local_data_file: the location of the data
            bucket: The bucket to upload to. Defaulted to the own default bucket
            prefix: The prefix to use to upload to. Defaulted to the own default bucket

        Returns:
            The s3 data location
        """
        if bucket is None:
            bucket = self.bucket
        if prefix is None:
            prefix = self.prefix
        LOGGER.info("Uploading data to S3")
        return self.session.upload_data(local_data_file,
                                        bucket=bucket,
                                        key_prefix=prefix)

    def download_data(
        self,
        file_name: str,
        local_file_directory: str,
        bucket: Optional[str] = None,
        prefix: Optional[str] = None,
    ) -> str:
        """
        Downloads the S3 data and stores it to the local file location.

        Arguments:
            file_name: the name of the file
            local_file_directory: the directory to store the data to
            bucket: The bucket to upload to. Defaulted to the own default bucket
            prefix: The prefix to use to upload to. Defaulted to the own default bucket

        Returns:
            The local file location.
        """
        s3_client = self.boto_session.client("s3")
        if prefix is None:
            prefix = self.prefix
        key = f"{prefix}/{file_name}"
        local_file_name = os.path.join(local_file_directory, file_name)
        LOGGER.info(
            f"Downloading data from s3: from s3://{self.bucket}/{key} to {local_file_name}"
        )
        if not os.path.exists(local_file_directory):
            os.makedirs(local_file_directory)
        s3_client.download_file(Bucket=self.bucket,
                                Key=key,
                                Filename=local_file_name)
        return local_file_name