Esempio n. 1
0
def test_feature_store_create(sagemaker_session_mock, role_arn,
                              feature_group_dummy_definitions, s3_uri):
    feature_group = FeatureGroup(name="MyFeatureGroup",
                                 sagemaker_session=sagemaker_session_mock)
    feature_group.feature_definitions = feature_group_dummy_definitions
    feature_group.create(
        s3_uri=s3_uri,
        record_identifier_name="feature1",
        event_time_feature_name="feature2",
        role_arn=role_arn,
        enable_online_store=True,
    )
    sagemaker_session_mock.create_feature_group.assert_called_with(
        feature_group_name="MyFeatureGroup",
        record_identifier_name="feature1",
        event_time_feature_name="feature2",
        feature_definitions=[
            fd.to_dict() for fd in feature_group_dummy_definitions
        ],
        role_arn=role_arn,
        description=None,
        tags=None,
        online_store_config={"EnableOnlineStore": True},
        offline_store_config={
            "DisableGlueTableCreation": False,
            "S3StorageConfig": {
                "S3Uri": s3_uri
            },
        },
    )
def test_as_hive_ddl_with_default_values(
    create_table_ddl, feature_group_dummy_definitions, sagemaker_session_mock
):
    sagemaker_session_mock.describe_feature_group.return_value = {
        "OfflineStoreConfig": {
            "S3StorageConfig": {
                "S3Uri": "s3://some-bucket",
                "ResolvedOutputS3Uri": "s3://resolved_output_s3_uri",
            }
        }
    }
    sagemaker_session_mock.account_id.return_value = "1234"
    sagemaker_session_mock.boto_session.region_name = "us-west-2"

    feature_group = FeatureGroup(name="MyGroup", sagemaker_session=sagemaker_session_mock)
    feature_group.feature_definitions = feature_group_dummy_definitions
    assert (
        create_table_ddl.format(
            database="sagemaker_featurestore",
            table_name="MyGroup",
            account="1234",
            region="us-west-2",
            feature_group_name="MyGroup",
        )
        == feature_group.as_hive_ddl()
    )
def create_feature_group(
    feature_group_name,
    feature_group_description,
    df,
    id_name,
    event_time_name,
    offline_feature_group_bucket,
    sagemaker_session,
    role,
):
    """
    Create a new FeatureGroup.

    :param feature_group_name: str
    :param feature_group_description: str
    :param df: pandas.DataFrame
    :param id_name: str
    :param event_time_name: str
    :param offline_feature_group_bucket: str
    :param sagemaker_session: sagemaker.Session()
    :param role: str
    :return: tuple(FeatureGroup, bool)
    """
    feature_group = FeatureGroup(name=feature_group_name,
                                 sagemaker_session=sagemaker_session)
    feature_definitions = get_feature_definitions(df, feature_group)
    feature_group.feature_definitions = feature_definitions
    feature_group_already_exists = False
    try:
        print(f"Trying to create feature group {feature_group_description} \n")
        feature_group.create(
            description=feature_group_description,
            record_identifier_name=id_name,
            event_time_feature_name=event_time_name,
            role_arn=role,
            s3_uri=offline_feature_group_bucket,
            enable_online_store=True,
        )
        wait_for_feature_group_creation_complete(feature_group)
    except Exception as e:
        code = e.response.get("Error").get("Code")
        if code == "ResourceInUse":
            print(f"Using existing feature group: {feature_group_name}")
            feature_group_already_exists = True
        else:
            raise (e)
    return feature_group, feature_group_already_exists
Esempio n. 4
0
def test_feature_store_create(
    sagemaker_session_mock, role_arn, feature_group_dummy_definitions, s3_uri
):
    feature_group = FeatureGroup(name="MyFeatureGroup", sagemaker_session=sagemaker_session_mock)
    feature_group.feature_definitions = feature_group_dummy_definitions
    feature_group.create(
        s3_uri=s3_uri,
        record_identifier_name="feature1",
        event_time_feature_name="feature2",
        role_arn=role_arn,
        enable_online_store=True,
    )
    assert sagemaker_session_mock.create_feature_group.called_with(
        feature_group_name="MyFeatureGroup",
        record_identifier_name="feature1",
        event_time_feature_name="feature2",
        role_arn=role_arn,
        online_store_config={"EnableOnlineStore": True},
        feature_definitions=[fd.to_dict() for fd in feature_group_dummy_definitions],
    )