예제 #1
0
 def _validate_preprocessed_dataset(user_features: FeatureDataset,
                                    item_features: FeatureDataset):
     if user_features is not None and any(
             user_features.df.duplicated(subset=user_features.ids.name)):
         ErrorMapping.throw(DuplicateFeatureDefinitionError())
     if item_features is not None and any(
             item_features.df.duplicated(subset=item_features.ids.name)):
         ErrorMapping.throw(DuplicateFeatureDefinitionError())
예제 #2
0
 def _validate_features_type(dataset: FeatureDataset):
     for col in dataset.columns:
         if dataset.get_column_type(col) == ColumnTypeName.NAN:
             ErrorMapping.throw(
                 InvalidColumnTypeError(
                     col_type=dataset.get_column_type(col),
                     col_name=col,
                     arg_name=dataset.name))
    def _check_features(self, features: FeatureDataset):
        """Check compatibility between recorded features and the given features.

        The two feature dataset are compatibility if:
        1. The new feature dataset contains all feature names in the old feature dataset
        2. The same feature in two dataset is of same type
        """
        common_logger.info(f"Check features compatibility with existing feature metas")
        for _, feature_meta in self.feature_metas.items():
            name = feature_meta.name
            if name not in features.features:
                ErrorMapping.throw(ColumnNotFoundError(column_id=name, arg_name_missing_column=features.name))
            column_type = features.get_column_type(name)
            if features.get_column_type(name) != feature_meta.type_:
                ErrorMapping.verify_element_type(type_=column_type, expected_type=feature_meta.type_, column_name=name,
                                                 arg_name=features.name)
예제 #4
0
    def _validate_preprocessed_dataset(transactions: TransactionDataset, user_features: FeatureDataset,
                                       item_features: FeatureDataset):
        if transactions.row_size <= 0:
            ErrorMapping.throw(
                InvalidDatasetError(dataset1=transactions.name, reason=f"dataset does not have any valid samples"))
        if transactions.df.duplicated(
                subset=transactions.columns[[TRANSACTIONS_USER_COL, TRANSACTIONS_ITEM_COL]]).any():
            ErrorMapping.throw(MoreThanOneRatingError())

        if user_features is not None and any(user_features.df.duplicated(subset=user_features.ids.name)):
            ErrorMapping.throw(DuplicateFeatureDefinitionError())
        if item_features is not None and any(item_features.df.duplicated(subset=item_features.ids.name)):
            ErrorMapping.throw(DuplicateFeatureDefinitionError())