Example #1
0
    def test_get_session_returns_a_boto3_session(self):
        hook = AwsBaseHook(aws_conn_id='aws_default', resource_type='dynamodb')
        session_from_hook = hook.get_session()
        resource_from_session = session_from_hook.resource('dynamodb')
        table = resource_from_session.create_table(  # pylint: disable=no-member
            TableName='test_airflow',
            KeySchema=[
                {
                    'AttributeName': 'id',
                    'KeyType': 'HASH'
                },
            ],
            AttributeDefinitions=[{
                'AttributeName': 'id',
                'AttributeType': 'S'
            }],
            ProvisionedThroughput={
                'ReadCapacityUnits': 10,
                'WriteCapacityUnits': 10
            },
        )

        table.meta.client.get_waiter('table_exists').wait(
            TableName='test_airflow')

        assert table.item_count == 0
Example #2
0
def get_sagemaker_role_arn(role_name, region_name):
    iam = boto3.client('iam', region_name=region_name)
    response = iam.get_role(RoleName=role_name)
    return response["Role"]["Arn"]


# =============================================================================
# setting up training, tuning and transform configuration
# =============================================================================

# read config file
config = cfg.config

hook = AwsBaseHook(client_type='sagemaker')
region = config["job_level"]["region_name"]
sess = hook.get_session(region_name=region)

role = config["train_model"]["sagemaker_role"]

container = sagemaker.image_uris.retrieve('factorization-machines', region,
                                          'latest')
hpo_enabled = is_hpo_enabled()

# create estimator
fm_estimator = Estimator(
    image_uri=container,
    role=role,
    sagemaker_session=sagemaker.session.Session(sess),
    train_instance_count=1,
    train_instance_type="ml.c5.4xlarge",
    train_volume_size=30,
Example #3
0
# must create a SageMaker team role that also has Glue access - must add this instruction in the blog
def get_sagemaker_role_arn(role_name, region_name):
    iam = boto3.client("iam", region_name=region_name)
    response = iam.get_role(RoleName=role_name)
    return response["Role"]["Arn"]


# =============================================================================
# setting up training, model creation and endpoint deployment configuration
# =============================================================================

# set configuration for tasks
hook = AwsBaseHook(aws_conn_id="airflow-sagemaker", client_type="sagemaker")
sess = hook.get_session(
    region_name=config.REGION_NAME
)  #how is this session different from the SageMaker session - necessary?
sagemaker_role = get_sagemaker_role_arn(config.SAGEMAKER_ROLE_NAME,
                                        config.REGION_NAME)
container = get_image_uri(sess.region_name, "xgboost")

# initialize training hyperparameters
hyperparameters = {
    "max_depth": "5",
    "eta": "0.2",
    "gamma": "4",
    "min_child_weight": "6",
    "subsample": "0.8",
    "objective": "binary:logistic",
    "num_round": "100"
}