X = X[self.raw_features].copy() X_pp = self.preprocessing_pipeline.transform(X) X_pp = pd.DataFrame(X_pp, columns=self.get_feature_names()) return X_pp def get_feature_names(self): """ Feature names after preprocessing. Replicates the get_feature_names function in the sklearn Transformer classes. """ return self.raw_features if __name__ == "__main__": from preprocessing import PreProcessor # noqa # Load data db_config = db.get_config() train = db.load(*db_config, 'raw_train') X_train = train.drop('SalePrice', axis=1) # Fit and transform training data pp = PreProcessor() X_train_pp = pp.fit_transform(X_train) train_pp = X_train_pp.assign(SalePrice=train['SalePrice']) # Save preprocessed data and fitted preprocessor db.save(train_pp, *db_config, 'processed_train') joblib.dump(pp, os.path.join(DIR, '../pickle/PreProcessor.pkl'))