def test_factorizationmachinebinaryclassifier(self):
        np.random.seed(0)

        df = get_dataset("infert").as_df()

        # remove : and ' ' from column names, and encode categorical column
        df.columns = [i.replace(': ', '') for i in df.columns]
        df = (OneHotVectorizer() << 'education_str').fit_transform(df)

        X_train, X_test, y_train, y_test = \
            train_test_split(df.loc[:, df.columns != 'case'], df['case'])

        lr = FactorizationMachineBinaryClassifier().fit(X_train, y_train)
        scores = lr.predict(X_test)
        accuracy = np.mean(y_test == [i for i in scores])
        assert_greater(accuracy, 0.98, "accuracy should be %s" % 0.98)
        pr = lr.predict_proba(X_test)
        assert pr.shape == (62, 2)
Esempio n. 2
0
 def test_model_summary_not_supported_specific(self):
     path = get_dataset('infert').as_filepath()
     data = FileDataStream.read_csv(path,
                                    sep=',',
                                    names={
                                        0: 'row_num',
                                        5: 'case'
                                    })
     pipeline = Pipeline([
         OneHotVectorizer(columns={'edu': 'education'}),
         FactorizationMachineBinaryClassifier(
             feature=['induced', 'edu', 'parity'], label='case')
     ])
     pipeline.fit(data)
     try:
         pipeline.summary()
     except TypeError as e:
         self.assertEqual(
             e.args[0],
             "One or more predictors in this pipeline do not support the .summary() function."
         )
     else:
         assert False
Esempio n. 3
0
 def test_pass_decision_function_binary_with_pipeline(self):
     assert_almost_equal(decfun_sum(
         Pipeline([FactorizationMachineBinaryClassifier()])),
                         -32.618393,
                         decimal=5,
                         err_msg=invalid_decision_function_output)
Esempio n. 4
0
NOBINARY_CHECKS = [
    'check_estimator_sparse_data', 'check_dtype_object',
    'check_fit_score_takes_y', 'check_fit2d_predict1d', 'check_fit1d_1feature',
    'check_dont_overwrite_parameters', 'check_supervised_y_2d',
    'check_estimators_fit_returns_self', 'check_estimators_overwrite_params',
    'check_estimators_dtypes', 'check_classifiers_classes',
    'check_classifiers_train'
]

INSTANCES = {
    'EnsembleClassifier':
    EnsembleClassifier(num_models=3),
    'EnsembleRegressor':
    EnsembleRegressor(num_models=3),
    'FactorizationMachineBinaryClassifier':
    FactorizationMachineBinaryClassifier(shuffle=False),
    'KMeansPlusPlus':
    KMeansPlusPlus(n_clusters=2),
    'LightGbmBinaryClassifier':
    LightGbmBinaryClassifier(minimum_example_count_per_group=1,
                             minimum_example_count_per_leaf=1),
    'LightGbmClassifier':
    LightGbmClassifier(minimum_example_count_per_group=1,
                       minimum_example_count_per_leaf=1),
    'LightGbmRegressor':
    LightGbmRegressor(minimum_example_count_per_group=1,
                      minimum_example_count_per_leaf=1),
    'LightGbmRanker':
    LightGbmRanker(minimum_example_count_per_group=1,
                   minimum_example_count_per_leaf=1),
    'NGramFeaturizer':
Esempio n. 5
0
    LogisticRegressionBinaryClassifier(),
    LogisticRegressionClassifier(),
    OnlineGradientDescentRegressor(),
    SgdBinaryClassifier(),
    # SymSgdBinaryClassifier(),
    OrdinaryLeastSquaresRegressor(),
    PoissonRegressionRegressor()
]

learners_not_supported = [
    NaiveBayesClassifier(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    KMeansPlusPlus(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    FactorizationMachineBinaryClassifier(),
    PcaAnomalyDetector(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    # PcaTransformer(), # REVIEW: crashes
    GamBinaryClassifier(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    GamRegressor(
    ),  # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    LightGbmClassifier(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    # LightGbmRanker(), # REVIEW: crashes
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    OneVsRestClassifier(FastLinearBinaryClassifier()),
]

Esempio n. 6
0
                               numeric_dtype=numpy.float32,
                               names={
                                   0: 'row_num',
                                   5: 'case'
                               })
print(data.head())
#    age  case education  induced  parity  pooled.stratum  row_num  ...
# 0  26.0   1.0    0-5yrs      1.0     6.0             3.0      1.0  ...
# 1  42.0   1.0    0-5yrs      1.0     1.0             1.0      2.0  ...
# 2  39.0   1.0    0-5yrs      2.0     6.0             4.0      3.0  ...
# 3  34.0   1.0    0-5yrs      2.0     4.0             2.0      4.0  ...
# 4  35.0   1.0   6-11yrs      1.0     3.0            32.0      5.0  ...
# define the training pipeline
pipeline = Pipeline([
    OneHotVectorizer(columns={'edu': 'education'}),
    FactorizationMachineBinaryClassifier(feature=['induced', 'edu', 'parity'],
                                         label='case')
])

# train, predict, and evaluate
metrics, predictions = pipeline.fit(data).test(data, output_scores=True)

# print predictions
print(predictions.head())
#   PredictedLabel  Probability     Score
# 0             0.0     0.370519 -0.529990
# 1             0.0     0.420984 -0.318737
# 2             0.0     0.364432 -0.556180
# 3             0.0     0.380421 -0.487761
# 4             0.0     0.365351 -0.552214
# print evaluation metrics
print(metrics)
Esempio n. 7
0
 def test_pass_decision_function_binary(self):
     assert_almost_equal(decfun_sum(FactorizationMachineBinaryClassifier()),
                         -30.2316,
                         decimal=4,
                         err_msg=invalid_decision_function_output)
# FactorizationMachineBinaryClassifier
import numpy as np
from nimbusml.datasets import get_dataset
from nimbusml.decomposition import FactorizationMachineBinaryClassifier
from nimbusml.feature_extraction.categorical import OneHotVectorizer
from sklearn.model_selection import train_test_split

# use the built-in data set 'infert' to create test and train data
#   Unnamed: 0  education   age  parity  induced  case  spontaneous  stratum  \
# 0           1        0.0  26.0     6.0      1.0   1.0          2.0      1.0
# 1           2        0.0  42.0     1.0      1.0   1.0          0.0      2.0
#   pooled.stratum education_str
# 0             3.0        0-5yrs
# 1             1.0        0-5yrs
np.random.seed(0)

df = get_dataset("infert").as_df()

# remove : and ' ' from column names, and encode categorical column
df.columns = [i.replace(': ', '') for i in df.columns]
df = (OneHotVectorizer() << 'education_str').fit_transform(df)

X_train, X_test, y_train, y_test = \
    train_test_split(df.drop('case', axis=1), df['case'])

fforest = FactorizationMachineBinaryClassifier().fit(X_train, y_train)
scores = fforest.predict(X_test)

# evaluate the model
print('Accuracy:', np.mean(y_test == [i for i in scores]))