コード例 #1
0
 def test_GamBinaryClassifier(self):
     np.random.seed(0)
     df = get_dataset("infert").as_df()
     df.columns = [i.replace(': ', '') for i in df.columns]
     df = (OneHotVectorizer() << 'education_str').fit_transform(df)
     X_train, X_test, y_train, y_test = \
         train_test_split(df.loc[:, df.columns != 'case'], df['case'])
     lr = GamBinaryClassifier().fit(X_train, y_train)
     scores = lr.predict(X_test)
     acc = np.mean(y_test == [i for i in scores])
     assert_greater(acc, 0.70, "accuracy should  %s" % 0.70)
コード例 #2
0
    def test_ovr_accuracy(self):
        clfs = [
            # TODO: BUG 231482 , why doesnt FM work
            # FactorizationMachineBinaryClassifier(),
            LogisticRegressionBinaryClassifier(number_of_threads=1),
            FastForestBinaryClassifier(minimum_example_count_per_leaf=1,
                                       number_of_threads=1),
            GamBinaryClassifier(number_of_threads=1),
            AveragedPerceptronBinaryClassifier(),
            FastTreesBinaryClassifier(minimum_example_count_per_leaf=1,
                                      number_of_threads=1),
            FastLinearBinaryClassifier(number_of_threads=1),
            SgdBinaryClassifier(number_of_threads=1),
            # SymSgdBinaryClassifier(number_of_threads=1),
        ]

        for clf in clfs:
            ovr = OneVsRestClassifier(classifier=clf, use_probabilities=True)
            metrics = accuracy(ovr)
            accu = metrics['Accuracy(micro-avg)'][0]
            # algos will have wide range of accuracy, so use low bar. Also
            # checks Pipeline + Ova + clf
            assert_greater(
                accu, 0.65,
                "{} accuracy is too low {}".format(clf.__class__, accu))
コード例 #3
0
    def test_failing_predict_proba_called_with_use_probabilites_false(self):
        clfs = [
            # TODO: BUG 231482 , why doesnt FM work
            # FactorizationMachineBinaryClassifier(),
            LogisticRegressionBinaryClassifier(),
            FastForestBinaryClassifier(minimum_example_count_per_leaf=1),
            GamBinaryClassifier(),
            AveragedPerceptronBinaryClassifier(),
            FastTreesBinaryClassifier(minimum_example_count_per_leaf=1),
            LightGbmBinaryClassifier(),
            FastLinearBinaryClassifier(),
            SgdBinaryClassifier(),
            # SymSgdBinaryClassifier(),
        ]

        for clf in clfs:
            ovr = OneVsRestClassifier(classifier=clf, use_probabilities=False)
            check_predict_proba_when_trained_with_use_probabilites_false(
                self, ovr, clf)
コード例 #4
0
    def test_failing_decision_function_called_with_use_probabilites_true(self):
        clfs = [
            # TODO: BUG 231482 , why doesnt FM work
            # FactorizationMachineBinaryClassifier(),
            LogisticRegressionBinaryClassifier(),
            FastForestBinaryClassifier(min_split=1),
            GamBinaryClassifier(),
            AveragedPerceptronBinaryClassifier(),
            FastTreesBinaryClassifier(min_split=1),
            LightGbmBinaryClassifier(),
            FastLinearBinaryClassifier(),
            SgdBinaryClassifier(),
            # SymSgdBinaryClassifier(),
        ]

        for clf in clfs:
            ovr = OneVsRestClassifier(classifier=clf, use_probabilities=True)
            check_decision_function_when_trained_with_use_probabilites_true(
                self, ovr, clf)
コード例 #5
0
    def test_decision_function_produces_distribution_not_sum_to_1(self):
        clfs = [
            # TODO: BUG 231482 , why doesnt FM work
            # FactorizationMachineBinaryClassifier(),
            LogisticRegressionBinaryClassifier(),
            FastForestBinaryClassifier(minimum_example_count_per_leaf=1),
            GamBinaryClassifier(),
            AveragedPerceptronBinaryClassifier(),
            FastTreesBinaryClassifier(minimum_example_count_per_leaf=1),
            LightGbmBinaryClassifier(),
            FastLinearBinaryClassifier(),
            SgdBinaryClassifier(),
            # SymSgdBinaryClassifier(),
        ]

        for clf in clfs:
            ovr = OneVsRestClassifier(classifier=clf, use_probabilities=False)
            scoremean = decfun_average(ovr)
            assert_not_equal(
                scoremean, 1.0,
                '{} raw scores should not sum to 1.0 over 3 classes'.format(
                    clf.__class__))
コード例 #6
0
    def test_predict_proba_produces_distribution_sum_to_1(self):
        clfs = [
            # TODO: BUG 231482 , why doesnt FM work
            # FactorizationMachineBinaryClassifier(),
            LogisticRegressionBinaryClassifier(),
            FastForestBinaryClassifier(minimum_example_count_per_leaf=1),
            GamBinaryClassifier(),
            AveragedPerceptronBinaryClassifier(),
            FastTreesBinaryClassifier(minimum_example_count_per_leaf=1),
            LightGbmBinaryClassifier(),
            FastLinearBinaryClassifier(),
            SgdBinaryClassifier(),
            # TODO: why symsgd does not sum to 1.0
            # SymSgdBinaryClassifier(),
        ]

        for clf in clfs:
            ovr = OneVsRestClassifier(classifier=clf)
            probmean = proba_average(ovr)
            assert_equal(
                probmean, 1.0,
                '{} probabilites {} do not sum to 1.0 over 3 classes'.format(
                    clf.__class__, probmean))
コード例 #7
0
# data input (as a FileDataStream)
path = get_dataset('infert').as_filepath()
data = FileDataStream.read_csv(path)
print(data.head())
#   age  case education  induced  parity  ... row_num  spontaneous  ...
# 0   26     1    0-5yrs        1       6 ...       1            2  ...
# 1   42     1    0-5yrs        1       1 ...       2            0  ...
# 2   39     1    0-5yrs        2       6 ...       3            0  ...
# 3   34     1    0-5yrs        2       4 ...       4            0  ...
# 4   35     1   6-11yrs        1       3 ...       5            1  ...

# define the training pipeline
pipeline = Pipeline([
    OneHotVectorizer(columns={'edu': 'education'}),
    GamBinaryClassifier(feature=['age', 'edu'], label='case')
])

# train, predict, and evaluate
# TODO: Replace with CV
metrics, predictions = pipeline.fit(data).test(data, output_scores=True)

# print predictions
print(predictions.head())
#   PredictedLabel     Score
# 0               0 -0.050461
# 1               0 -0.049737
# 2               0 -0.049737
# 3               0 -0.050461
# 4               0 -0.050552
# print evaluation metrics
コード例 #8
0
# GamBinaryClassifier
import numpy as np
from nimbusml.datasets import get_dataset
from nimbusml.ensemble import GamBinaryClassifier
from nimbusml.feature_extraction.categorical import OneHotVectorizer
from sklearn.model_selection import train_test_split

# use the built-in data set 'infert' to create test and train data
#   Unnamed: 0  education   age  parity  induced  case  spontaneous  stratum  \
# 0           1        0.0  26.0     6.0      1.0   1.0          2.0      1.0
# 1           2        0.0  42.0     1.0      1.0   1.0          0.0      2.0
#   pooled.stratum education_str
# 0             3.0        0-5yrs
# 1             1.0        0-5yrs
np.random.seed(0)

df = get_dataset("infert").as_df()

# remove : and ' ' from column names, and encode categorical column
df.columns = [i.replace(': ', '') for i in df.columns]
df = (OneHotVectorizer() << 'education_str').fit_transform(df)

X_train, X_test, y_train, y_test = \
    train_test_split(df.loc[:, df.columns != 'case'], df['case'])

ftree = GamBinaryClassifier().fit(X_train, y_train)
scores = ftree.predict(X_test)

# evaluate the model
print('Accuracy:', np.mean(y_test == [i for i in scores]))
コード例 #9
0
    # SymSgdBinaryClassifier(),
    OrdinaryLeastSquaresRegressor(),
    PoissonRegressionRegressor()
]

learners_not_supported = [
    NaiveBayesClassifier(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    KMeansPlusPlus(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    FactorizationMachineBinaryClassifier(),
    PcaAnomalyDetector(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    # PcaTransformer(), # REVIEW: crashes
    GamBinaryClassifier(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    GamRegressor(
    ),  # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    LightGbmClassifier(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    # LightGbmRanker(), # REVIEW: crashes
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    OneVsRestClassifier(FastLinearBinaryClassifier()),
]


class TestModelSummary(unittest.TestCase):
    def test_model_summary(self):
        for learner in learners:
            pipeline = Pipeline(