コード例 #1
0
def set_up_efs_fsx(sagemaker_session, ec2_instance_type):
    try:
        _check_or_create_key_pair(sagemaker_session)
        _check_or_create_iam_profile_and_attach_role(sagemaker_session)

        subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx(
            sagemaker_session, VPC_NAME
        )
        fs_resources["subnet_id"] = subnet_ids[0]
        fs_resources["security_group_ids"] = security_group_ids

        ami_id = _ami_id_for_region(sagemaker_session)
        ec2_instance = _create_ec2_instance(
            sagemaker_session,
            ami_id,
            ec2_instance_type,
            KEY_NAME,
            MIN_COUNT,
            MAX_COUNT,
            security_group_ids,
            subnet_ids[0],
        )

        file_system_efs_id, mount_efs_target_id = _create_efs(sagemaker_session)
        file_system_fsx_id = _create_fsx(sagemaker_session)

        connected_instance = _connect_ec2_instance(ec2_instance)
        region = sagemaker_session.boto_region_name
        _upload_data_and_mount_fs(
            connected_instance, file_system_efs_id, file_system_fsx_id, region
        )
        return fs_resources
    except Exception:
        tear_down(sagemaker_session, fs_resources)
        raise
コード例 #2
0
def set_up_efs_fsx(sagemaker_session):
    _check_or_create_key_pair(sagemaker_session)
    _check_or_create_iam_profile_and_attach_role(sagemaker_session)
    subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx(
        sagemaker_session, VPC_NAME
    )

    ami_id = _ami_id_for_region(sagemaker_session)
    ec2_instance = _create_ec2_instance(
        sagemaker_session,
        ami_id,
        EC2_INSTANCE_TYPE,
        KEY_NAME,
        MIN_COUNT,
        MAX_COUNT,
        security_group_ids,
        subnet_ids[0],
    )

    file_system_efs_id = _check_or_create_efs(sagemaker_session)
    mount_efs_target_id = _create_efs_mount(sagemaker_session, file_system_efs_id)

    file_system_fsx_id = _check_or_create_fsx(sagemaker_session)

    fs_resources = FsResources(
        KEY_NAME,
        KEY_PATH,
        ROLE_NAME,
        subnet_ids[0],
        security_group_ids,
        file_system_efs_id,
        file_system_fsx_id,
        ec2_instance.id,
        mount_efs_target_id,
    )

    region = sagemaker_session.boto_region_name
    try:
        connected_instance = _connect_ec2_instance(ec2_instance)
        _upload_data_and_mount_fs(
            connected_instance, file_system_efs_id, file_system_fsx_id, region
        )
    except Exception:
        tear_down(sagemaker_session, fs_resources)
        raise

    return fs_resources
コード例 #3
0
def _create_efs_mount(sagemaker_session, file_system_id):
    subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx(
        sagemaker_session, VPC_NAME
    )
    efs_client = sagemaker_session.boto_session.client("efs")
    mount_response = efs_client.create_mount_target(
        FileSystemId=file_system_id, SubnetId=subnet_ids[0], SecurityGroups=security_group_ids
    )
    mount_target_id = mount_response["MountTargetId"]

    for _ in retries(50, "Checking EFS mounting target status"):
        desc = efs_client.describe_mount_targets(MountTargetId=mount_target_id)
        status = desc["MountTargets"][0]["LifeCycleState"]
        if status == "available":
            break

    return mount_target_id
コード例 #4
0
def _create_fsx(sagemaker_session):
    fsx_client = sagemaker_session.boto_session.client("fsx")
    subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx(
        sagemaker_session, VPC_NAME)
    create_response = fsx_client.create_file_system(
        FileSystemType="LUSTRE",
        StorageCapacity=STORAGE_CAPACITY_IN_BYTES,
        SubnetIds=[subnet_ids[0]],
        SecurityGroupIds=security_group_ids,
    )
    fsx_id = create_response["FileSystem"]["FileSystemId"]
    fs_resources["file_system_fsx_id"] = fsx_id

    for _ in retries(50, "Checking FSX creating status"):
        desc = fsx_client.describe_file_systems(FileSystemIds=[fsx_id])
        status = desc["FileSystems"][0]["Lifecycle"]
        if status == "AVAILABLE":
            break

    return fsx_id