def set_up_efs_fsx(sagemaker_session, ec2_instance_type): try: _check_or_create_key_pair(sagemaker_session) _check_or_create_iam_profile_and_attach_role(sagemaker_session) subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx( sagemaker_session, VPC_NAME ) fs_resources["subnet_id"] = subnet_ids[0] fs_resources["security_group_ids"] = security_group_ids ami_id = _ami_id_for_region(sagemaker_session) ec2_instance = _create_ec2_instance( sagemaker_session, ami_id, ec2_instance_type, KEY_NAME, MIN_COUNT, MAX_COUNT, security_group_ids, subnet_ids[0], ) file_system_efs_id, mount_efs_target_id = _create_efs(sagemaker_session) file_system_fsx_id = _create_fsx(sagemaker_session) connected_instance = _connect_ec2_instance(ec2_instance) region = sagemaker_session.boto_region_name _upload_data_and_mount_fs( connected_instance, file_system_efs_id, file_system_fsx_id, region ) return fs_resources except Exception: tear_down(sagemaker_session, fs_resources) raise
def set_up_efs_fsx(sagemaker_session): _check_or_create_key_pair(sagemaker_session) _check_or_create_iam_profile_and_attach_role(sagemaker_session) subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx( sagemaker_session, VPC_NAME ) ami_id = _ami_id_for_region(sagemaker_session) ec2_instance = _create_ec2_instance( sagemaker_session, ami_id, EC2_INSTANCE_TYPE, KEY_NAME, MIN_COUNT, MAX_COUNT, security_group_ids, subnet_ids[0], ) file_system_efs_id = _check_or_create_efs(sagemaker_session) mount_efs_target_id = _create_efs_mount(sagemaker_session, file_system_efs_id) file_system_fsx_id = _check_or_create_fsx(sagemaker_session) fs_resources = FsResources( KEY_NAME, KEY_PATH, ROLE_NAME, subnet_ids[0], security_group_ids, file_system_efs_id, file_system_fsx_id, ec2_instance.id, mount_efs_target_id, ) region = sagemaker_session.boto_region_name try: connected_instance = _connect_ec2_instance(ec2_instance) _upload_data_and_mount_fs( connected_instance, file_system_efs_id, file_system_fsx_id, region ) except Exception: tear_down(sagemaker_session, fs_resources) raise return fs_resources
def _create_efs_mount(sagemaker_session, file_system_id): subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx( sagemaker_session, VPC_NAME ) efs_client = sagemaker_session.boto_session.client("efs") mount_response = efs_client.create_mount_target( FileSystemId=file_system_id, SubnetId=subnet_ids[0], SecurityGroups=security_group_ids ) mount_target_id = mount_response["MountTargetId"] for _ in retries(50, "Checking EFS mounting target status"): desc = efs_client.describe_mount_targets(MountTargetId=mount_target_id) status = desc["MountTargets"][0]["LifeCycleState"] if status == "available": break return mount_target_id
def _create_fsx(sagemaker_session): fsx_client = sagemaker_session.boto_session.client("fsx") subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx( sagemaker_session, VPC_NAME) create_response = fsx_client.create_file_system( FileSystemType="LUSTRE", StorageCapacity=STORAGE_CAPACITY_IN_BYTES, SubnetIds=[subnet_ids[0]], SecurityGroupIds=security_group_ids, ) fsx_id = create_response["FileSystem"]["FileSystemId"] fs_resources["file_system_fsx_id"] = fsx_id for _ in retries(50, "Checking FSX creating status"): desc = fsx_client.describe_file_systems(FileSystemIds=[fsx_id]) status = desc["FileSystems"][0]["Lifecycle"] if status == "AVAILABLE": break return fsx_id