def setUpClass(cls): df = get_dataset("infert").as_df() # remove : and ' ' from column names, and encode categorical column df.columns = [i.replace(': ', '') for i in df.columns] assert is_string_dtype(df['education_str'].dtype) df = (OneHotVectorizer() << ['education_str']).fit_transform(df) assert 'education_str' not in df.columns cls.X, cls.y = split_features_and_label(df, 'case')
def get_iris(): df = get_dataset("iris").as_df() df.drop(['Label'], inplace=True, axis=1) df['Label'] = df['Species'] df.drop(['Species'], inplace=True, axis=1) df.drop(['Setosa'], inplace=True, axis=1) X, y = split_features_and_label(df, 'Label') return X, y
def infert_df(label_name): df = get_dataset('infert').as_df() df = (OneHotVectorizer() << 'education_str').fit_transform(df) X, y = split_features_and_label(df, label_name) return X, y
from nimbusml.decomposition import FactorizationMachineBinaryClassifier from nimbusml.ensemble import LightGbmClassifier from nimbusml.linear_model import FastLinearClassifier from nimbusml.linear_model import LogisticRegressionBinaryClassifier from nimbusml.linear_model import LogisticRegressionClassifier from nimbusml.naive_bayes import NaiveBayesClassifier from nimbusml.tests.test_utils import split_features_and_label from sklearn.model_selection import train_test_split from sklearn.utils.testing import assert_almost_equal, assert_equal # use iris dataset np.random.seed(0) df = get_dataset("iris").as_df() df.drop(['Species'], inplace=True, axis=1) df.Label = [1 if x == 1 else 0 for x in df.Label] features, labels = split_features_and_label(df, 'Label') X_train, X_test, y_train, y_test = \ train_test_split(features, labels) # 3 class dataset with string labels np.random.seed(0) df = get_dataset("iris").as_df() df.drop(['Species'], inplace=True, axis=1) _str_map = {0: 'Red', 1: 'Green', 2: 'Blue'} df.Label = df.Label.apply(lambda x: _str_map[x]) features_3class, labels_3class = split_features_and_label(df, 'Label') X_train_3class, X_test_3class, y_train_3class, y_test_3class = \ train_test_split(features_3class, labels_3class) # fit classifier, return sum of probabilites
from nimbusml.linear_model import FastLinearBinaryClassifier, \ AveragedPerceptronBinaryClassifier from nimbusml.linear_model import LogisticRegressionBinaryClassifier, \ SgdBinaryClassifier # from nimbusml.linear_model import SymSgdBinaryClassifier from nimbusml.multiclass import OneVsRestClassifier from nimbusml.tests.test_utils import split_features_and_label from sklearn.model_selection import train_test_split from sklearn.utils.testing import assert_equal, assert_not_equal, \ assert_greater # use iris dataset np.random.seed(0) df = get_dataset("iris").as_df() df.drop(['Species'], inplace=True, axis=1) features, labels = split_features_and_label(df, 'Label') X_train, X_test, y_train, y_test = \ train_test_split(features, labels) # fit classifier, return sum of probabilites def proba_average(ovr): ovr.fit(X_train, y_train) return ovr.predict_proba(X_test).sum(axis=1).mean() # fit classifier, return sum of decision values def decfun_average(ovr): ovr.fit(X_train, y_train) return ovr.decision_function(X_test).sum(axis=1).mean()
def setUpClass(cls): df = get_dataset("iris").as_df() df.drop(['Species'], inplace=True, axis=1) cls.X, cls.y = split_features_and_label(df, 'Label')