def _validate_parameters(self,
                          learner: WideNDeepModel,
                          test_data: Dataset,
                          user_features: FeatureDataset = None,
                          item_features: FeatureDataset = None,
                          **kwargs):
     training_transactions = kwargs["training_transactions"]
     super()._validate_parameters(learner,
                                  test_data,
                                  user_features=user_features,
                                  item_features=item_features)
     ErrorMapping.verify_not_null_or_empty(training_transactions,
                                           name="Training data")
     ErrorMapping.verify_number_of_rows_greater_than_or_equal_to(
         curr_row_count=training_transactions.row_size,
         required_row_count=1,
         arg_name=training_transactions.name)
     ErrorMapping.verify_number_of_columns_less_than_or_equal_to(
         curr_column_count=training_transactions.column_size,
         required_column_count=3,
         arg_name=training_transactions.name)
     ErrorMapping.verify_number_of_columns_greater_than_or_equal_to(
         curr_column_count=training_transactions.column_size,
         required_column_count=2,
         arg_name=training_transactions.name)
     ErrorMapping.verify_number_of_columns_greater_than_or_equal_to(
         curr_column_count=test_data.column_size,
         required_column_count=2,
         arg_name=test_data.name)
예제 #2
0
 def _validate_feature_dataset(dataset: FeatureDataset):
     ErrorMapping.verify_number_of_columns_greater_than_or_equal_to(curr_column_count=dataset.column_size,
                                                                    required_column_count=2,
                                                                    arg_name=dataset.name)
     ErrorMapping.verify_number_of_rows_greater_than_or_equal_to(curr_row_count=dataset.row_size,
                                                                 required_row_count=1,
                                                                 arg_name=dataset.name)
     TrainWideAndDeepRecommenderModule._validate_features_column_type(dataset)
예제 #3
0
 def _validate_preprocessed_dataset(user_features: FeatureDataset,
                                    item_features: FeatureDataset):
     if user_features is not None and any(
             user_features.df.duplicated(subset=user_features.ids.name)):
         ErrorMapping.throw(DuplicateFeatureDefinitionError())
     if item_features is not None and any(
             item_features.df.duplicated(subset=item_features.ids.name)):
         ErrorMapping.throw(DuplicateFeatureDefinitionError())
예제 #4
0
 def _validate_features_type(dataset: FeatureDataset):
     for col in dataset.columns:
         if dataset.get_column_type(col) == ColumnTypeName.NAN:
             ErrorMapping.throw(
                 InvalidColumnTypeError(
                     col_type=dataset.get_column_type(col),
                     col_name=col,
                     arg_name=dataset.name))
예제 #5
0
 def _validate_feature_dataset(self, dataset: FeatureDataset):
     ErrorMapping.verify_number_of_columns_greater_than_or_equal_to(
         curr_column_count=dataset.column_size,
         required_column_count=2,
         arg_name=dataset.name)
     ErrorMapping.verify_number_of_rows_greater_than_or_equal_to(
         curr_row_count=dataset.row_size,
         required_row_count=1,
         arg_name=dataset.name)
     self._validate_features_type(dataset)
예제 #6
0
 def set_inputs_name(transactions: TransactionDataset, user_features: FeatureDataset = None,
                     item_features: FeatureDataset = None):
     _TRANSACTIONS_NAME = "Training dataset of user-item-rating triples"
     _USER_FEATURES_NAME = "User features"
     _ITEM_FEATURES_NAME = "Item features"
     if transactions is not None:
         transactions.name = _TRANSACTIONS_NAME
     else:
         ErrorMapping.verify_not_null_or_empty(x=transactions, name=_TRANSACTIONS_NAME)
     if user_features is not None:
         user_features.name = _USER_FEATURES_NAME
     if item_features is not None:
         item_features.name = _ITEM_FEATURES_NAME
 def _validate_parameters(self,
                          learner: WideNDeepModel,
                          test_data: Dataset,
                          user_features: FeatureDataset = None,
                          item_features: FeatureDataset = None,
                          **kwargs):
     super()._validate_parameters(learner,
                                  test_data,
                                  user_features=user_features,
                                  item_features=item_features)
     ErrorMapping.verify_number_of_columns_greater_than_or_equal_to(
         curr_column_count=test_data.column_size,
         required_column_count=2,
         arg_name=test_data.name)
    def _check_features(self, features: FeatureDataset):
        """Check compatibility between recorded features and the given features.

        The two feature dataset are compatibility if:
        1. The new feature dataset contains all feature names in the old feature dataset
        2. The same feature in two dataset is of same type
        """
        common_logger.info(f"Check features compatibility with existing feature metas")
        for _, feature_meta in self.feature_metas.items():
            name = feature_meta.name
            if name not in features.features:
                ErrorMapping.throw(ColumnNotFoundError(column_id=name, arg_name_missing_column=features.name))
            column_type = features.get_column_type(name)
            if features.get_column_type(name) != feature_meta.type_:
                ErrorMapping.verify_element_type(type_=column_type, expected_type=feature_meta.type_, column_name=name,
                                                 arg_name=features.name)
예제 #9
0
    def _validate_preprocessed_dataset(transactions: TransactionDataset, user_features: FeatureDataset,
                                       item_features: FeatureDataset):
        if transactions.row_size <= 0:
            ErrorMapping.throw(
                InvalidDatasetError(dataset1=transactions.name, reason=f"dataset does not have any valid samples"))
        if transactions.df.duplicated(
                subset=transactions.columns[[TRANSACTIONS_USER_COL, TRANSACTIONS_ITEM_COL]]).any():
            ErrorMapping.throw(MoreThanOneRatingError())

        if user_features is not None and any(user_features.df.duplicated(subset=user_features.ids.name)):
            ErrorMapping.throw(DuplicateFeatureDefinitionError())
        if item_features is not None and any(item_features.df.duplicated(subset=item_features.ids.name)):
            ErrorMapping.throw(DuplicateFeatureDefinitionError())
예제 #10
0
 def _validate_datasets(transactions: TransactionDataset, user_features: FeatureDataset = None,
                        item_features: FeatureDataset = None):
     ErrorMapping.verify_number_of_columns_equal_to(curr_column_count=transactions.column_size,
                                                    required_column_count=3,
                                                    arg_name=transactions.name)
     ErrorMapping.verify_number_of_rows_greater_than_or_equal_to(curr_row_count=transactions.row_size,
                                                                 required_row_count=1,
                                                                 arg_name=transactions.name)
     ErrorMapping.verify_element_type(type_=transactions.get_column_type(TRANSACTIONS_RATING_COL),
                                      expected_type=ColumnTypeName.NUMERIC,
                                      column_name=transactions.ratings.name,
                                      arg_name=transactions.name)
     if user_features is not None:
         TrainWideAndDeepRecommenderModule._validate_feature_dataset(user_features)
     if item_features is not None:
         TrainWideAndDeepRecommenderModule._validate_feature_dataset(item_features)
예제 #11
0
 def _validate_parameters(self,
                          learner: WideNDeepModel,
                          test_data: Dataset,
                          user_features: FeatureDataset = None,
                          item_features: FeatureDataset = None,
                          **kwargs):
     ErrorMapping.verify_not_null_or_empty(x=learner,
                                           name=WideNDeepModel.MODEL_NAME)
     ErrorMapping.verify_number_of_rows_greater_than_or_equal_to(
         curr_row_count=test_data.row_size,
         required_row_count=1,
         arg_name=test_data.name)
     ErrorMapping.verify_number_of_columns_less_than_or_equal_to(
         curr_column_count=test_data.column_size,
         required_column_count=3,
         arg_name=test_data.name)
     if user_features is not None:
         self._validate_feature_dataset(user_features)
     if item_features is not None:
         self._validate_feature_dataset(item_features)