def test_feature_definition(): definition = FeatureDefinition(feature_name="MyFeature", feature_type=FeatureTypeEnum.INTEGRAL) assert ordered(definition.to_dict()) == ordered({ "FeatureName": "MyFeature", "FeatureType": "Integral", })
def load_feature_definitions( self, data_frame: DataFrame, ) -> Sequence[FeatureDefinition]: """Load feature definitions from a Pandas DataFrame. Column name is used as feature name. Feature type is inferred from the dtype of the column. Dtype int_, int8, int16, int32, int64, uint8, uint16, uint32 and uint64 are mapped to Integral feature type. Dtype float_, float16, float32 and float64 are mapped to Fractional feature type. string dtype is mapped to String feature type. No feature definitions will be loaded if the given data_frame contains unsupported dtypes. Args: data_frame (DataFrame): Returns: list of FeatureDefinition """ feature_definitions = [] for column in data_frame: feature_type = self._DTYPE_TO_FEATURE_DEFINITION_CLS_MAP.get( str(data_frame[column].dtype), None) if feature_type: feature_definitions.append( FeatureDefinition(feature_name=column, feature_type=feature_type)) else: raise ValueError( f"Failed to infer Feature type based on dtype {data_frame[column].dtype} " f"for column {column}.") self.feature_definitions = feature_definitions return self.feature_definitions
def get_feature_definitions(df, feature_group): """ Get datatypes from pandas DataFrame and map them to Feature Store datatypes. :param df: pandas.DataFrame :param feature_group: FeatureGroup :return: list """ # Dtype int_, int8, int16, int32, int64, uint8, uint16, uint32 # and uint64 are mapped to Integral feature type. # Dtype float_, float16, float32 and float64 # are mapped to Fractional feature type. # string dtype is mapped to String feature type. # Our schema of our data that we expect # _after_ SageMaker Processing feature_definitions = [] for column in df.columns: feature_type = feature_group._DTYPE_TO_FEATURE_DEFINITION_CLS_MAP.get( str(df[column].dtype), None) feature_definitions.append(FeatureDefinition( column, feature_type)) # you can alternatively define your own schema return feature_definitions
def create_or_load_feature_group(prefix, feature_group_name): # Feature Definitions for our records feature_definitions = [ FeatureDefinition(feature_name="input_ids", feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name="input_mask", feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name="segment_ids", feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name="label_id", feature_type=FeatureTypeEnum.INTEGRAL), FeatureDefinition(feature_name="review_id", feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name="date", feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name="label", feature_type=FeatureTypeEnum.INTEGRAL), # FeatureDefinition(feature_name='review_body', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name="split_type", feature_type=FeatureTypeEnum.STRING), ] feature_group = FeatureGroup( name=feature_group_name, feature_definitions=feature_definitions, sagemaker_session=sagemaker_session ) print("Feature Group: {}".format(feature_group)) try: print( "Waiting for existing Feature Group to become available if it is being created by another instance in our cluster..." ) wait_for_feature_group_creation_complete(feature_group) except Exception as e: print("Before CREATE FG wait exeption: {}".format(e)) # pass try: record_identifier_feature_name = "review_id" event_time_feature_name = "date" print("Creating Feature Group with role {}...".format(role)) feature_group.create( s3_uri=f"s3://{bucket}/{prefix}", record_identifier_name=record_identifier_feature_name, event_time_feature_name=event_time_feature_name, role_arn=role, enable_online_store=True, ) print("Creating Feature Group. Completed.") print("Waiting for new Feature Group to become available...") wait_for_feature_group_creation_complete(feature_group) print("Feature Group available.") feature_group.describe() except Exception as e: print("Exception: {}".format(e)) return feature_group
def create_or_load_feature_group(prefix, feature_group_name): # Feature Definitions for our records feature_definitions = [ FeatureDefinition(feature_name='input_ids', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='input_mask', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='segment_ids', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='label_id', feature_type=FeatureTypeEnum.INTEGRAL), FeatureDefinition(feature_name='review_id', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='date', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='label', feature_type=FeatureTypeEnum.INTEGRAL), # FeatureDefinition(feature_name='review_body', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='split_type', feature_type=FeatureTypeEnum.STRING) ] feature_group = FeatureGroup(name=feature_group_name, feature_definitions=feature_definitions, sagemaker_session=sagemaker_session) print('Feature Group: {}'.format(feature_group)) try: print( 'Waiting for existing Feature Group to become available if it is being created by another instance in our cluster...' ) wait_for_feature_group_creation_complete(feature_group) except Exception as e: print('Before CREATE FG wait exeption: {}'.format(e)) # pass try: record_identifier_feature_name = "review_id" event_time_feature_name = "date" print('Creating Feature Group with role {}...'.format(role)) feature_group.create( s3_uri=f"s3://{bucket}/{prefix}", record_identifier_name=record_identifier_feature_name, event_time_feature_name=event_time_feature_name, role_arn=role, enable_online_store=True) print('Creating Feature Group. Completed.') print('Waiting for new Feature Group to become available...') wait_for_feature_group_creation_complete(feature_group) print('Feature Group available.') feature_group.describe() except Exception as e: print('Exception: {}'.format(e)) # pass # print('FAILED - NOW Creating Feature Group with service-role {}...'.format('arn:aws:iam::231218423789:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole')) # feature_group.create( # s3_uri=f"s3://{bucket}/{prefix}", # record_identifier_name=record_identifier_feature_name, # event_time_feature_name=event_time_feature_name, # role_arn='arn:aws:iam::231218423789:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole', # enable_online_store=True # ) # print('Creating Feature Group. Completed.') # feature_group.describe() return feature_group
def create_or_load_feature_group(prefix, feature_group_name): # Feature Definitions for the records feature_definitions = [ FeatureDefinition(feature_name='review_id', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='date', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='sentiment', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='label_id', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='input_ids', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='review_body', feature_type=FeatureTypeEnum.STRING), FeatureDefinition(feature_name='split_type', feature_type=FeatureTypeEnum.STRING) ] # setup the Feature Group feature_group = FeatureGroup(name=feature_group_name, feature_definitions=feature_definitions, sagemaker_session=sagemaker_session) print('Feature Group: {}'.format(feature_group)) try: print( 'Waiting for existing Feature Group to become available if it is being created by another instance in our cluster...' ) wait_for_feature_group_creation_complete(feature_group) except Exception as e: print('Before CREATE FG wait exeption: {}'.format(e)) try: record_identifier_feature_name = "review_id" event_time_feature_name = "date" print('Creating Feature Group with role {}...'.format(role)) # create Feature Group feature_group.create( s3_uri=f"s3://{bucket}/{prefix}", record_identifier_name=record_identifier_feature_name, event_time_feature_name=event_time_feature_name, role_arn=role, enable_online_store=False) print('Creating Feature Group. Completed.') print('Waiting for new Feature Group to become available...') wait_for_feature_group_creation_complete(feature_group) print('Feature Group available.') # the information about the Feature Group feature_group.describe() except Exception as e: print('Exception: {}'.format(e)) return feature_group