def test_normalize(self): preprocessor = Preprocessor(COLS_NUM, ["merchantZip"], PATH_READ_X, PATH_READ_y, DIR_EDA) preprocessor.run(normalize=True)
def test_run(self): preprocessor = Preprocessor(COLS_NUM, ["merchantZip"], PATH_READ_X, PATH_READ_y, DIR_EDA) preprocessor.run(hot_encode=False, normalize=False)
def test_one_hot_encoding(self): preprocessor = Preprocessor(COLS_NUM, ["merchantZip"], PATH_READ_X, PATH_READ_y, DIR_EDA) preprocessor.run(hot_encode=True)
eda_runner = EDA(COLS_NUM, PATH_READ_X, PATH_READ_y, DIR_EDA) preprocessor = Preprocessor(COLS_NUM, ["merchantZip"], PATH_READ_X, PATH_READ_y, DIR_EDA) eda_runner.run() # Log results results = { "accuracy": [], "f1": [], "precision": [], "recall": [], "model": [] } # Iterate over different models for model_name, model_dict in pipeline_dict.items(): data = preprocessor.run(**model_dict["preprocessing_params"]) # Define X and y y = data["fraud"] X = data.drop(columns=COLS_OTHER + ["fraud"]) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.4) # Train and predict model = model_dict["model"] clf = GridSearchCV(model, model_dict["model_params"]) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # Get scores results["accuracy"].append(accuracy_score(y_test, y_pred))