Example #1
0
 def test_normalize(self):
     preprocessor = Preprocessor(COLS_NUM, ["merchantZip"], PATH_READ_X,
                                 PATH_READ_y, DIR_EDA)
     preprocessor.run(normalize=True)
Example #2
0
 def test_run(self):
     preprocessor = Preprocessor(COLS_NUM, ["merchantZip"], PATH_READ_X,
                                 PATH_READ_y, DIR_EDA)
     preprocessor.run(hot_encode=False, normalize=False)
Example #3
0
 def test_one_hot_encoding(self):
     preprocessor = Preprocessor(COLS_NUM, ["merchantZip"], PATH_READ_X,
                                 PATH_READ_y, DIR_EDA)
     preprocessor.run(hot_encode=True)
Example #4
0
    eda_runner = EDA(COLS_NUM, PATH_READ_X, PATH_READ_y, DIR_EDA)
    preprocessor = Preprocessor(COLS_NUM, ["merchantZip"], PATH_READ_X,
                                PATH_READ_y, DIR_EDA)
    eda_runner.run()

    # Log results
    results = {
        "accuracy": [],
        "f1": [],
        "precision": [],
        "recall": [],
        "model": []
    }
    # Iterate over different models
    for model_name, model_dict in pipeline_dict.items():
        data = preprocessor.run(**model_dict["preprocessing_params"])

        # Define X and y
        y = data["fraud"]
        X = data.drop(columns=COLS_OTHER + ["fraud"])
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=0.4)

        # Train and predict
        model = model_dict["model"]
        clf = GridSearchCV(model, model_dict["model_params"])
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        # Get scores
        results["accuracy"].append(accuracy_score(y_test, y_pred))