def test_GamBinaryClassifier(self): np.random.seed(0) df = get_dataset("infert").as_df() df.columns = [i.replace(': ', '') for i in df.columns] df = (OneHotVectorizer() << 'education_str').fit_transform(df) X_train, X_test, y_train, y_test = \ train_test_split(df.loc[:, df.columns != 'case'], df['case']) lr = GamBinaryClassifier().fit(X_train, y_train) scores = lr.predict(X_test) acc = np.mean(y_test == [i for i in scores]) assert_greater(acc, 0.70, "accuracy should %s" % 0.70)
# GamBinaryClassifier import numpy as np from nimbusml.datasets import get_dataset from nimbusml.ensemble import GamBinaryClassifier from nimbusml.feature_extraction.categorical import OneHotVectorizer from sklearn.model_selection import train_test_split # use the built-in data set 'infert' to create test and train data # Unnamed: 0 education age parity induced case spontaneous stratum \ # 0 1 0.0 26.0 6.0 1.0 1.0 2.0 1.0 # 1 2 0.0 42.0 1.0 1.0 1.0 0.0 2.0 # pooled.stratum education_str # 0 3.0 0-5yrs # 1 1.0 0-5yrs np.random.seed(0) df = get_dataset("infert").as_df() # remove : and ' ' from column names, and encode categorical column df.columns = [i.replace(': ', '') for i in df.columns] df = (OneHotVectorizer() << 'education_str').fit_transform(df) X_train, X_test, y_train, y_test = \ train_test_split(df.loc[:, df.columns != 'case'], df['case']) ftree = GamBinaryClassifier().fit(X_train, y_train) scores = ftree.predict(X_test) # evaluate the model print('Accuracy:', np.mean(y_test == [i for i in scores]))