def _validate_parameters(self, learner: WideNDeepModel, test_data: Dataset, user_features: FeatureDataset = None, item_features: FeatureDataset = None, **kwargs): training_transactions = kwargs["training_transactions"] super()._validate_parameters(learner, test_data, user_features=user_features, item_features=item_features) ErrorMapping.verify_not_null_or_empty(training_transactions, name="Training data") ErrorMapping.verify_number_of_rows_greater_than_or_equal_to( curr_row_count=training_transactions.row_size, required_row_count=1, arg_name=training_transactions.name) ErrorMapping.verify_number_of_columns_less_than_or_equal_to( curr_column_count=training_transactions.column_size, required_column_count=3, arg_name=training_transactions.name) ErrorMapping.verify_number_of_columns_greater_than_or_equal_to( curr_column_count=training_transactions.column_size, required_column_count=2, arg_name=training_transactions.name) ErrorMapping.verify_number_of_columns_greater_than_or_equal_to( curr_column_count=test_data.column_size, required_column_count=2, arg_name=test_data.name)
def _validate_feature_dataset(dataset: FeatureDataset): ErrorMapping.verify_number_of_columns_greater_than_or_equal_to(curr_column_count=dataset.column_size, required_column_count=2, arg_name=dataset.name) ErrorMapping.verify_number_of_rows_greater_than_or_equal_to(curr_row_count=dataset.row_size, required_row_count=1, arg_name=dataset.name) TrainWideAndDeepRecommenderModule._validate_features_column_type(dataset)
def _validate_preprocessed_dataset(user_features: FeatureDataset, item_features: FeatureDataset): if user_features is not None and any( user_features.df.duplicated(subset=user_features.ids.name)): ErrorMapping.throw(DuplicateFeatureDefinitionError()) if item_features is not None and any( item_features.df.duplicated(subset=item_features.ids.name)): ErrorMapping.throw(DuplicateFeatureDefinitionError())
def _validate_features_type(dataset: FeatureDataset): for col in dataset.columns: if dataset.get_column_type(col) == ColumnTypeName.NAN: ErrorMapping.throw( InvalidColumnTypeError( col_type=dataset.get_column_type(col), col_name=col, arg_name=dataset.name))
def _validate_feature_dataset(self, dataset: FeatureDataset): ErrorMapping.verify_number_of_columns_greater_than_or_equal_to( curr_column_count=dataset.column_size, required_column_count=2, arg_name=dataset.name) ErrorMapping.verify_number_of_rows_greater_than_or_equal_to( curr_row_count=dataset.row_size, required_row_count=1, arg_name=dataset.name) self._validate_features_type(dataset)
def set_inputs_name(transactions: TransactionDataset, user_features: FeatureDataset = None, item_features: FeatureDataset = None): _TRANSACTIONS_NAME = "Training dataset of user-item-rating triples" _USER_FEATURES_NAME = "User features" _ITEM_FEATURES_NAME = "Item features" if transactions is not None: transactions.name = _TRANSACTIONS_NAME else: ErrorMapping.verify_not_null_or_empty(x=transactions, name=_TRANSACTIONS_NAME) if user_features is not None: user_features.name = _USER_FEATURES_NAME if item_features is not None: item_features.name = _ITEM_FEATURES_NAME
def _validate_parameters(self, learner: WideNDeepModel, test_data: Dataset, user_features: FeatureDataset = None, item_features: FeatureDataset = None, **kwargs): super()._validate_parameters(learner, test_data, user_features=user_features, item_features=item_features) ErrorMapping.verify_number_of_columns_greater_than_or_equal_to( curr_column_count=test_data.column_size, required_column_count=2, arg_name=test_data.name)
def _check_features(self, features: FeatureDataset): """Check compatibility between recorded features and the given features. The two feature dataset are compatibility if: 1. The new feature dataset contains all feature names in the old feature dataset 2. The same feature in two dataset is of same type """ common_logger.info(f"Check features compatibility with existing feature metas") for _, feature_meta in self.feature_metas.items(): name = feature_meta.name if name not in features.features: ErrorMapping.throw(ColumnNotFoundError(column_id=name, arg_name_missing_column=features.name)) column_type = features.get_column_type(name) if features.get_column_type(name) != feature_meta.type_: ErrorMapping.verify_element_type(type_=column_type, expected_type=feature_meta.type_, column_name=name, arg_name=features.name)
def _validate_preprocessed_dataset(transactions: TransactionDataset, user_features: FeatureDataset, item_features: FeatureDataset): if transactions.row_size <= 0: ErrorMapping.throw( InvalidDatasetError(dataset1=transactions.name, reason=f"dataset does not have any valid samples")) if transactions.df.duplicated( subset=transactions.columns[[TRANSACTIONS_USER_COL, TRANSACTIONS_ITEM_COL]]).any(): ErrorMapping.throw(MoreThanOneRatingError()) if user_features is not None and any(user_features.df.duplicated(subset=user_features.ids.name)): ErrorMapping.throw(DuplicateFeatureDefinitionError()) if item_features is not None and any(item_features.df.duplicated(subset=item_features.ids.name)): ErrorMapping.throw(DuplicateFeatureDefinitionError())
def _validate_datasets(transactions: TransactionDataset, user_features: FeatureDataset = None, item_features: FeatureDataset = None): ErrorMapping.verify_number_of_columns_equal_to(curr_column_count=transactions.column_size, required_column_count=3, arg_name=transactions.name) ErrorMapping.verify_number_of_rows_greater_than_or_equal_to(curr_row_count=transactions.row_size, required_row_count=1, arg_name=transactions.name) ErrorMapping.verify_element_type(type_=transactions.get_column_type(TRANSACTIONS_RATING_COL), expected_type=ColumnTypeName.NUMERIC, column_name=transactions.ratings.name, arg_name=transactions.name) if user_features is not None: TrainWideAndDeepRecommenderModule._validate_feature_dataset(user_features) if item_features is not None: TrainWideAndDeepRecommenderModule._validate_feature_dataset(item_features)
def _validate_parameters(self, learner: WideNDeepModel, test_data: Dataset, user_features: FeatureDataset = None, item_features: FeatureDataset = None, **kwargs): ErrorMapping.verify_not_null_or_empty(x=learner, name=WideNDeepModel.MODEL_NAME) ErrorMapping.verify_number_of_rows_greater_than_or_equal_to( curr_row_count=test_data.row_size, required_row_count=1, arg_name=test_data.name) ErrorMapping.verify_number_of_columns_less_than_or_equal_to( curr_column_count=test_data.column_size, required_column_count=3, arg_name=test_data.name) if user_features is not None: self._validate_feature_dataset(user_features) if item_features is not None: self._validate_feature_dataset(item_features)