# AveragedPerceptronBinaryClassifier
import numpy as np
from nimbusml.datasets import get_dataset
from nimbusml.feature_extraction.categorical import OneHotVectorizer
from nimbusml.linear_model import AveragedPerceptronBinaryClassifier
from sklearn.model_selection import train_test_split

# use the built-in data set 'infert' to create test and train data
#   Unnamed: 0  education   age  parity  induced  case  spontaneous  stratum  \
# 0           1        0.0  26.0     6.0      1.0   1.0          2.0      1.0
# 1           2        0.0  42.0     1.0      1.0   1.0          0.0      2.0
#   pooled.stratum education_str
# 0             3.0        0-5yrs
# 1             1.0        0-5yrs
np.random.seed(0)

df = get_dataset("infert").as_df()

# remove : and ' ' from column names, and encode categorical column
df.columns = [i.replace(': ', '') for i in df.columns]
df = (OneHotVectorizer() << 'education_str').fit_transform(df)

X_train, X_test, y_train, y_test = \
    train_test_split(df.loc[:, df.columns != 'case'], df['case'])

lr = AveragedPerceptronBinaryClassifier().fit(X_train, y_train)
scores = lr.predict(X_test)

# Evaluate the model
print('Accuracy:', np.mean(y_test == [i for i in scores]))
Exemple #2
0
    def test_syntax_slots_wo_pipeline(self):
        # data
        df = get_dataset("infert").as_df()
        df = df.drop(['row_num', ], axis=1)
        X = df.drop('case', axis=1)
        y = df['case']

        # transform
        xf1 = OneHotVectorizer(columns=['age', 'parity', 'education_str'])
        X_xf1 = xf1.fit_transform(X, verbose=0)
        assert "age.21" in list(X_xf1.columns)

        # learner
        # (1 .a.)
        model = AveragedPerceptronBinaryClassifier()

        # (1. b)
        try:
            model = AveragedPerceptronBinaryClassifier(feature=['age'])
            model.fit(X_xf1, y, verbose=0)
            cont = True
            assert False
        except Exception as e:
            # does not work
            cont = False
            print(e)

        if cont:
            y_pred = model.predict(X_xf1)
            assert y_pred.shape == (248, 3)

        pipeline = Pipeline([
            OneHotVectorizer(columns=['age', 'parity', 'education_str']),
            AveragedPerceptronBinaryClassifier(feature='age')
        ])

        pipeline.fit(X, y, verbose=0)

        y_pred_withpipeline = pipeline.predict(X)
        print(y_pred_withpipeline.head())
        assert y_pred_withpipeline.shape == (248, 3)

        metrics, scores = pipeline.test(X, y, output_scores=True)
        print(metrics)
        assert scores.shape == (248, 3)
        assert metrics.shape == (1, 11)

        # back to X_xf1
        print(list(X_xf1.columns))
        l1 = list(sorted(set(_.split('.')[-1] for _ in X_xf1.columns)))
        levels = [['age', 'education', 'education_str', 'parity',
                   'pooled', 'spontaneous', 'stratum', 'induced'], [''] + l1]
        names = ['columns', 'slots']
        labels = [[], []]
        ages = []
        for _ in X_xf1.columns:
            spl = _.split('.')
            l1 = levels[0].index(spl[0])
            try:
                l2 = levels[1].index(spl[1])
            except IndexError:
                l2 = levels[1].index('')
            labels[0].append(l1)
            labels[1].append(l2)
            if spl[0] == 'age':
                ages.append(l2)
        X_xf1.columns = pandas.MultiIndex(
            levels=levels, labels=labels, names=names)
        print(X_xf1.head(n=2).T)

        col_ages = [('age', a) for a in ages]
        print(col_ages)
        try:
            model = AveragedPerceptronBinaryClassifier(feature=col_ages)
            model.fit(X_xf1, y, verbose=0)
            y_pred = model.predict(X_xf1)
            assert y_pred.shape == (248, 3)
        except Exception as e:
            # Does not work, probably confusion between list and tuple in nimbusml
            print(e)

        try:
            model = AveragedPerceptronBinaryClassifier(feature=['age'])
            model.fit(X_xf1, y, verbose=0)
            y_pred = model.predict(X_xf1)
            assert y_pred.shape == (248, 3)
        except Exception as e:
            # Does not work.
            print(e)