def test_get_session_returns_a_boto3_session(self): hook = AwsBaseHook(aws_conn_id='aws_default', resource_type='dynamodb') session_from_hook = hook.get_session() resource_from_session = session_from_hook.resource('dynamodb') table = resource_from_session.create_table( # pylint: disable=no-member TableName='test_airflow', KeySchema=[ { 'AttributeName': 'id', 'KeyType': 'HASH' }, ], AttributeDefinitions=[{ 'AttributeName': 'id', 'AttributeType': 'S' }], ProvisionedThroughput={ 'ReadCapacityUnits': 10, 'WriteCapacityUnits': 10 }, ) table.meta.client.get_waiter('table_exists').wait( TableName='test_airflow') assert table.item_count == 0
def get_sagemaker_role_arn(role_name, region_name): iam = boto3.client('iam', region_name=region_name) response = iam.get_role(RoleName=role_name) return response["Role"]["Arn"] # ============================================================================= # setting up training, tuning and transform configuration # ============================================================================= # read config file config = cfg.config hook = AwsBaseHook(client_type='sagemaker') region = config["job_level"]["region_name"] sess = hook.get_session(region_name=region) role = config["train_model"]["sagemaker_role"] container = sagemaker.image_uris.retrieve('factorization-machines', region, 'latest') hpo_enabled = is_hpo_enabled() # create estimator fm_estimator = Estimator( image_uri=container, role=role, sagemaker_session=sagemaker.session.Session(sess), train_instance_count=1, train_instance_type="ml.c5.4xlarge", train_volume_size=30,
# must create a SageMaker team role that also has Glue access - must add this instruction in the blog def get_sagemaker_role_arn(role_name, region_name): iam = boto3.client("iam", region_name=region_name) response = iam.get_role(RoleName=role_name) return response["Role"]["Arn"] # ============================================================================= # setting up training, model creation and endpoint deployment configuration # ============================================================================= # set configuration for tasks hook = AwsBaseHook(aws_conn_id="airflow-sagemaker", client_type="sagemaker") sess = hook.get_session( region_name=config.REGION_NAME ) #how is this session different from the SageMaker session - necessary? sagemaker_role = get_sagemaker_role_arn(config.SAGEMAKER_ROLE_NAME, config.REGION_NAME) container = get_image_uri(sess.region_name, "xgboost") # initialize training hyperparameters hyperparameters = { "max_depth": "5", "eta": "0.2", "gamma": "4", "min_child_weight": "6", "subsample": "0.8", "objective": "binary:logistic", "num_round": "100" }