예제 #1
0
def test_linear_regression():
    regressor_cls = get_sklearn_learner(
        'sklearn.linear_model.LinearRegression')
    from warnings import simplefilter
    from sklearn.exceptions import ConvergenceWarning
    simplefilter("ignore", category=ConvergenceWarning)

    fs = RFE(regressor_cls)

    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    n_vars = max(x_df.shape[1] - 5, 1)
    m = fs.fit(x_df, y_df, n_vars)

    # Assertions
    assert len(fs.selected_variables) == n_vars
    assert fs.selected_variables == ['Height.ABS(* - MEDIAN(*))', 'Height.ABS(* - MEAN(*))', 'Viscera weight.ABS(* - MEDIAN(*))', \
     'Shell weight', 'Diameter.ABS(* - MEAN(*))', 'Shell weight.ABS(* - MEDIAN(*))', 'Shucked weight', 'Viscera weight.ABS(* - MEAN(*))', \
     'Shell weight.ABS(* - MEAN(*))', 'Diameter.ABS(* - MEDIAN(*))', 'Diameter', 'Height', 'Length.ABS(* - Q25(*))', \
     'Shucked weight.ABS(* - Q75(*))', 'Height.ABS(* - Q75(*))', 'Viscera weight', 'Shell weight.ABS(* - Q25(*))', 'Diameter.ABS(* - Q75(*))', \
     'Shucked weight.ABS(* - MEDIAN(*))', 'Whole weight.ABS(* - Q25(*))', 'Length', 'Shucked weight.ABS(* - MEAN(*))', 'Viscera weight.ABS(* - Q75(*))', \
     'Shucked weight.ABS(* - Q25(*))', 'Whole weight', 'Whole weight.ABS(* - MEAN(*))', 'Viscera weight.ABS(* - Q25(*))', 'Whole weight.ABS(* - Q75(*))', \
     'Whole weight.ABS(* - MEDIAN(*))', 'Length.ABS(* - MEDIAN(*))', 'Length.ABS(* - Q75(*))', 'Length.ABS(* - MEAN(*))', 'Shell weight.ABS(* - Q75(*))']
예제 #2
0
def test_lightgbm_classifier():
    # Classification
    params = params = {
        'objective': 'binary',
        'metric': ['auc', 'binary_logloss'],
        'boosting_type': 'gbdt',
    }
    classifier_cls = get_lightgbm_learner_learning_api(params, num_boost_round=10000, \
     early_stopping_rounds=5, verbose_eval=50, split_random_seed=42)
    fs = RFE(classifier_cls)

    dataset = BankNote()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    n_vars = max(x_df.shape[1] - 5, 1)
    m = fs.fit(x_df, y_df, n_vars)

    # Assertions
    assert len(fs.selected_variables) == n_vars
    assert fs.selected_variables == ['Variance', 'Kurtosis', 'Skewness.ABS(* - MEAN(*))', 'Skewness', 'Variance.ABS(* - MEAN(*))', \
     'Entropy', 'Variance.ABS(* - Q25(*))', 'Kurtosis.ABS(* - MEDIAN(*))', 'Kurtosis.ABS(* - Q75(*))', 'Skewness.ABS(* - MEDIAN(*))', \
     'Kurtosis.ABS(* - Q25(*))', 'Kurtosis.ABS(* - MEAN(*))', 'Variance.ABS(* - MEDIAN(*))', 'Entropy.ABS(* - MEDIAN(*))', \
     'Entropy.ABS(* - Q25(*))']
예제 #3
0
def test_xgboost_regression():
    regressor_cls = get_xgboost_learner('xgboost.XGBRegressor', random_state=0)

    fs = RFE(regressor_cls)

    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    n_vars = max(x_df.shape[1] - 5, 1)
    m = fs.fit(x_df, y_df, n_vars)

    # Assertions
    assert len(fs.selected_variables) == n_vars
    assert fs.selected_variables == ['Shell weight', 'Sex_I', 'Shucked weight.ABS(* - Q25(*))', 'Shucked weight', 'Shucked weight.ABS(* - MEDIAN(*))', \
     'Shucked weight.ABS(* - MEAN(*))', 'Diameter.ABS(* - Q75(*))', 'Height.ABS(* - Q75(*))', 'Diameter.ABS(* - MEAN(*))', 'Diameter.ABS(* - Q25(*))', \
     'Whole weight.ABS(* - MEDIAN(*))', 'Viscera weight.ABS(* - Q75(*))', 'Sex_M', 'Height.ABS(* - MEAN(*))', 'Shucked weight.ABS(* - Q75(*))', \
     'Viscera weight.ABS(* - MEAN(*))', 'Height.ABS(* - Q25(*))', 'Whole weight.ABS(* - MEAN(*))', 'Shell weight.ABS(* - Q25(*))', 'Whole weight.ABS(* - Q25(*))', \
     'Length.ABS(* - MEAN(*))', 'Length.ABS(* - Q75(*))', 'Whole weight.ABS(* - Q75(*))', 'Diameter.ABS(* - MEDIAN(*))', 'Shell weight.ABS(* - Q75(*))', \
     'Shell weight.ABS(* - MEAN(*))', 'Shell weight.ABS(* - MEDIAN(*))', 'Length.ABS(* - MEDIAN(*))', 'Sex_F', 'Viscera weight', 'Whole weight', \
     'Length.ABS(* - Q25(*))', 'Viscera weight.ABS(* - MEDIAN(*))']
예제 #4
0
def test_xgboost_classifier():
    # Binary classification
    classifier_cls = get_xgboost_learner('xgboost.XGBClassifier',
                                         use_label_encoder=False,
                                         eval_metric='logloss',
                                         learning_rate=0.1,
                                         max_depth=10)
    fs = RFE(classifier_cls)

    dataset = BankNote()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    n_vars = max(x_df.shape[1] - 5, 1)
    m = fs.fit(x_df, y_df, n_vars)

    # Assertions
    assert len(fs.selected_variables) == n_vars
    assert fs.selected_variables == ['Variance', 'Skewness', 'Kurtosis', 'Entropy', 'Skewness.ABS(* - MEDIAN(*))', \
     'Variance.ABS(* - MEAN(*))', 'Skewness.ABS(* - MEAN(*))', 'Kurtosis.ABS(* - MEDIAN(*))', 'Kurtosis.ABS(* - Q25(*))', \
     'Entropy.ABS(* - MEDIAN(*))', 'Skewness.ABS(* - Q25(*))', 'Entropy.ABS(* - MEAN(*))', 'Variance.ABS(* - Q25(*))', \
     'Kurtosis.ABS(* - MEAN(*))', 'Kurtosis.ABS(* - Q75(*))']
예제 #5
0
def test_lightgbm_regression():
    lgbm_params = {
        'objective': 'rmse',
        'boosting_type': 'gbdt',
        'n_jobs': -1,
        'learning_rate': 0.1,
        'verbose': -1,
    }
    regressor_cls = get_lightgbm_learner_learning_api(lgbm_params, num_boost_round=2000, \
     early_stopping_rounds=5, split_random_seed=0)

    fs = RFE(regressor_cls)

    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    n_vars = max(x_df.shape[1] - 5, 1)
    m = fs.fit(x_df, y_df, n_vars)

    assert len(fs.selected_variables) == n_vars
    assert fs.selected_variables == ['Shell weight', 'Shucked weight', 'Shucked weight.ABS(* - MEAN(*))', 'Whole weight.ABS(* - MEAN(*))', \
     'Viscera weight', 'Shucked weight.ABS(* - MEDIAN(*))', 'Shell weight.ABS(* - MEAN(*))', 'Whole weight', 'Shucked weight.ABS(* - Q75(*))', \
     'Height', 'Diameter', 'Sex_I', 'Shell weight.ABS(* - Q75(*))', 'Whole weight.ABS(* - Q25(*))', 'Shucked weight.ABS(* - Q25(*))', \
     'Length', 'Viscera weight.ABS(* - Q75(*))', 'Viscera weight.ABS(* - MEAN(*))', 'Diameter.ABS(* - MEAN(*))', 'Viscera weight.ABS(* - MEDIAN(*))', \
     'Whole weight.ABS(* - MEDIAN(*))', 'Height.ABS(* - Q75(*))', 'Diameter.ABS(* - Q75(*))', 'Sex_F', 'Whole weight.ABS(* - Q75(*))', \
     'Viscera weight.ABS(* - Q25(*))', 'Shell weight.ABS(* - Q25(*))', 'Length.ABS(* - MEDIAN(*))', 'Height.ABS(* - MEAN(*))', \
     'Shell weight.ABS(* - MEDIAN(*))', 'Diameter.ABS(* - Q25(*))', 'Length.ABS(* - MEAN(*))', 'Height.ABS(* - MEDIAN(*))']

    n_vars = 5
    m = fs.fit(x_df, y_df, n_vars)

    # Assertions
    assert len(fs.selected_variables) == n_vars
    assert fs.selected_variables == [
        'Shell weight', 'Shucked weight', 'Whole weight', 'Viscera weight',
        'Shucked weight.ABS(* - MEAN(*))'
    ]