예제 #1
0
        "model": XGBRegressor(),
        "params": {
            "gamma": np.random.uniform(low=0.01, high=0.05, size=10),
            "max_depth": [4, 5, 6],
            "min_child_weight": [4, 5, 6],
            "reg_alpha": [1e-5, 1e-2, 0.1, 1, 10, 100]
        }
    }
}

if __name__ == "__main__":
    df = load_sol_challenge()
    # Data Preprocessing
    preprocessor = PreProcessor()
    df = preprocessor.str_to_float(df, cols_with_str)
    df = preprocessor.remove_nans(df)

    # EDA
    # Data Distribution
    data_distribution = DataDistribution(cols_to_analyse,
                                         PATH_RESULTS_EDA_DIST,
                                         ignore_outliers=False)
    data_distribution.run(df)
    # Feature Correlation
    feature_correlation = FeatureCorrelation(cols_to_analyse,
                                             PATH_RESULTS_EDA_CORR,
                                             figsize=(9, 9))
    feature_correlation.run(df)

    # Get independent and dependent variables
    X = np.asarray(df[X_names_num])