def fit(self, pipeline_config, X, Y, dataset_info):
        categorical_features = dataset_info.categorical_features
        ohe = OneHotEncoder(categories="auto",
                            sparse=False,
                            handle_unknown="ignore")
        encoder = ColumnTransformer(transformers=[
            ("ohe", ohe, [i for i, f in enumerate(categorical_features) if f])
        ],
                                    remainder="passthrough")
        encoder.categories_ = np.array([])
        encoder.categorical_features = categorical_features

        if any(categorical_features) and not dataset_info.is_sparse:
            # encode X
            X = encoder.fit_transform(X)
            encoder.categories_ = encoder.transformers_[0][1].categories_

        # Y to matrix
        Y, y_encoder = self.complete_y_tranformation(Y)

        dataset_info.categorical_features = None
        return {
            'X': X,
            'one_hot_encoder': encoder,
            'Y': Y,
            'y_one_hot_encoder': y_encoder,
            'dataset_info': dataset_info
        }
Example #2
0
    def fit(self, pipeline_config, X_train, X_valid, Y_train, Y_valid,
            categorical_features):
        ohe = OneHotEncoder(categories="auto",
                            sparse=False,
                            handle_unknown="ignore")
        encoder = ColumnTransformer(transformers=[
            ("ohe", ohe, [i for i, f in enumerate(categorical_features) if f])
        ],
                                    remainder="passthrough")
        encoder.categories_ = np.array([])
        encoder.categorical_features = categorical_features

        if any(categorical_features) and not scipy.sparse.issparse(X_train):
            # encode X
            X_train = encoder.fit_transform(X_train)
            if (X_valid is not None):
                X_valid = encoder.transform(X_valid)
            encoder.categories_ = encoder.transformers_[0][1].categories_

        # Y to matrix
        y_encoder = None
        Y_train = Y_train.astype(np.float32)
        if len(Y_train.shape) == 1:
            Y_train = Y_train.reshape(-1, 1)
        if Y_valid is not None and len(Y_valid.shape) == 1:
            Y_valid = Y_valid.reshape(-1, 1)

        # encode Y
        if self.encode_Y and not scipy.sparse.issparse(Y_train):
            y_encoder = OneHotEncoder(sparse=False,
                                      categories="auto",
                                      handle_unknown='ignore')
            y_encoder.categories_ = np.array([])
            Y_train = y_encoder.fit_transform(Y_train)
            if Y_valid is not None:
                Y_valid = y_encoder.transform(Y_valid)

        return {
            'X_train': X_train,
            'X_valid': X_valid,
            'one_hot_encoder': encoder,
            'Y_train': Y_train,
            'Y_valid': Y_valid,
            'y_one_hot_encoder': y_encoder,
            'categorical_features': None
        }