class PreprocessedData:
    """
    This class combines the feature engineering and feature selection
    class to one to preprocess the data.
    It takes in the cleaned data and the y_column_name(ratings)
    """
    def __init__(self, data, y_column_name):
        self.data = data
        self.y_column_name = y_column_name
        self.feature_engineering = FeatureEngineering(data, y_column_name)
        self.feature_selection = FeatureSelection(data, y_column_name)

    def preprocess_my_data(self, num_of_features_to_select):
        """
        This method preprocesses the cleaned data and performs
        feature selection to select best n-features
        :param num_of_features_to_select: n-best features of the model
        :return: Full preprocesse data with n-selected features
        """
        self.data = self.feature_engineering.input_rare_categorical()
        self.data = self.feature_engineering.encode_categorical_features()
        self.data = self.feature_engineering.scale_features()
        self.data = self.feature_selection.perform_feature_selection(
            num_of_features_to_select)
        return self.data
class ProcessedData:
    def __init__(self, train, test, id_column, y_column_name):
        self.train = train
        self.test = test
        self.y_column_name = y_column_name
        self.id_column = id_column
        self.data = pd.concat([self.train, self.test], ignore_index=True)
        self.feature_engineering = FeatureEngineering(self.train, self.test,
                                                      self.id_column,
                                                      self.y_column_name)
        self.feature_selection = FeatureSelection(self.train, self.test,
                                                  self.id_column,
                                                  self.y_column_name)

    def preprocess_my_data(self, num_of_features_to_select):
        self.data = self.feature_engineering.scale_features()
        self.data = self.feature_selection.perform_feature_selection(
            num_of_features_to_select)
        return self.data
Ejemplo n.º 3
0
class PreprocessedData:
    def __init__(self, train, test, id_column, y_column_name):
        self.train = train
        self.test = test
        self.y_column_name = y_column_name
        self.id_column = id_column
        self.data = pd.concat([train, test], ignore_index=True)
        self.feature_engineering = FeatureEngineering(train, test, id_column,
                                                      y_column_name)
        self.feature_selection = FeatureSelection(train, test, id_column,
                                                  y_column_name)

    def preprocess_my_data(self, num_of_features_to_select):
        self.data = self.feature_engineering.fill_na_categorical()
        self.data = self.feature_engineering.fill_na_numerical()
        self.data = self.feature_engineering.input_rare_categorical()
        self.data = self.feature_engineering.label_encoder()
        self.data = self.feature_engineering.get_scale_features()
        self.data = self.feature_selection.perform_feature_selection(
            num_of_features_to_select)
        return self.data