def setUpClass(cls):
     df = get_dataset("infert").as_df()
     # remove : and ' ' from column names, and encode categorical column
     df.columns = [i.replace(': ', '') for i in df.columns]
     assert is_string_dtype(df['education_str'].dtype)
     df = (OneHotVectorizer() << ['education_str']).fit_transform(df)
     assert 'education_str' not in df.columns
     cls.X, cls.y = split_features_and_label(df, 'case')
Beispiel #2
0
def get_iris():
    df = get_dataset("iris").as_df()
    df.drop(['Label'], inplace=True, axis=1)
    df['Label'] = df['Species']
    df.drop(['Species'], inplace=True, axis=1)
    df.drop(['Setosa'], inplace=True, axis=1)
    X, y = split_features_and_label(df, 'Label')
    return X, y
Beispiel #3
0
def infert_df(label_name):
    df = get_dataset('infert').as_df()
    df = (OneHotVectorizer() << 'education_str').fit_transform(df)
    X, y = split_features_and_label(df, label_name)
    return X, y
Beispiel #4
0
from nimbusml.decomposition import FactorizationMachineBinaryClassifier
from nimbusml.ensemble import LightGbmClassifier
from nimbusml.linear_model import FastLinearClassifier
from nimbusml.linear_model import LogisticRegressionBinaryClassifier
from nimbusml.linear_model import LogisticRegressionClassifier
from nimbusml.naive_bayes import NaiveBayesClassifier
from nimbusml.tests.test_utils import split_features_and_label
from sklearn.model_selection import train_test_split
from sklearn.utils.testing import assert_almost_equal, assert_equal

# use iris dataset
np.random.seed(0)
df = get_dataset("iris").as_df()
df.drop(['Species'], inplace=True, axis=1)
df.Label = [1 if x == 1 else 0 for x in df.Label]
features, labels = split_features_and_label(df, 'Label')
X_train, X_test, y_train, y_test = \
    train_test_split(features, labels)

# 3 class dataset with string labels
np.random.seed(0)
df = get_dataset("iris").as_df()
df.drop(['Species'], inplace=True, axis=1)
_str_map = {0: 'Red', 1: 'Green', 2: 'Blue'}
df.Label = df.Label.apply(lambda x: _str_map[x])
features_3class, labels_3class = split_features_and_label(df, 'Label')
X_train_3class, X_test_3class, y_train_3class, y_test_3class = \
    train_test_split(features_3class, labels_3class)


# fit classifier, return sum of probabilites
from nimbusml.linear_model import FastLinearBinaryClassifier, \
    AveragedPerceptronBinaryClassifier
from nimbusml.linear_model import LogisticRegressionBinaryClassifier, \
    SgdBinaryClassifier
# from nimbusml.linear_model import SymSgdBinaryClassifier
from nimbusml.multiclass import OneVsRestClassifier
from nimbusml.tests.test_utils import split_features_and_label
from sklearn.model_selection import train_test_split
from sklearn.utils.testing import assert_equal, assert_not_equal, \
    assert_greater

# use iris dataset
np.random.seed(0)
df = get_dataset("iris").as_df()
df.drop(['Species'], inplace=True, axis=1)
features, labels = split_features_and_label(df, 'Label')
X_train, X_test, y_train, y_test = \
    train_test_split(features, labels)


# fit classifier, return sum of probabilites
def proba_average(ovr):
    ovr.fit(X_train, y_train)
    return ovr.predict_proba(X_test).sum(axis=1).mean()


# fit classifier, return sum of decision values
def decfun_average(ovr):
    ovr.fit(X_train, y_train)
    return ovr.decision_function(X_test).sum(axis=1).mean()
 def setUpClass(cls):
     df = get_dataset("iris").as_df()
     df.drop(['Species'], inplace=True, axis=1)
     cls.X, cls.y = split_features_and_label(df, 'Label')