def nb_iris(): print "Importing iris_wheader.csv data...\n" iris = h2o.upload_file(tests.locate("smalldata/iris/iris_wheader.csv")) iris.describe() laplace_range = [0, 1, 0.25] for i in laplace_range: print "H2O Naive Bayes with Laplace smoothing = {0}".format(i) iris_nbayes = h2o.naive_bayes(x=iris[0:4], y=iris[4], laplace=i) iris_nbayes.show()
def nb_iris(ip, port): print "Importing iris_wheader.csv data...\n" iris = h2o.upload_file(h2o.locate("smalldata/iris/iris_wheader.csv")) iris.describe() laplace_range = [0, 1, 0.25] for i in laplace_range: print "H2O Naive Bayes with Laplace smoothing = {0}".format(i) iris_nbayes = h2o.naive_bayes(x=iris[0:4], y=iris[4], laplace=i) iris_nbayes.show()
def nb_init_err(): print("Importing iris_wheader.csv data...\n") iris = h2o.upload_file( pyunit_utils.locate("smalldata/iris/iris_wheader.csv")) iris.describe print("Laplace smoothing parameter is negative") try: h2o.naive_bayes(x=iris[0:4], y=iris[4], laplace=-1) assert False, "Expected naive bayes algo to fail on negative laplace training parameter" except: pass print("Minimum standard deviation is zero") try: h2o.naive_bayes(x=iris[0:4], y=iris[4], min_sdev=0) assert False, "Expected naive bayes algo to fail on min_sdev = 0" except: pass print("Response column is not categorical") try: h2o.naive_bayes(x=iris[0:3], y=iris[3], min_sdev=0) assert False, "Expected naive bayes algo to fail on response not categorical" except: pass
def nb_init_err(): print("Importing iris_wheader.csv data...\n") iris = h2o.upload_file(pyunit_utils.locate("smalldata/iris/iris_wheader.csv")) iris.describe print("Laplace smoothing parameter is negative") try: h2o.naive_bayes(x=iris[0:4], y=iris[4], laplace=-1) assert False, "Expected naive bayes algo to fail on negative laplace training parameter" except: pass print("Minimum standard deviation is zero") try: h2o.naive_bayes(x=iris[0:4], y=iris[4], min_sdev=0) assert False, "Expected naive bayes algo to fail on min_sdev = 0" except: pass print("Response column is not categorical") try: h2o.naive_bayes(x=iris[0:3], y=iris[3], min_sdev=0) assert False, "Expected naive bayes algo to fail on response not categorical" except: pass
def nb_prostate(): print "Importing prostate.csv data..." prostate = h2o.upload_file(tests.locate("smalldata/logreg/prostate.csv")) print "Converting CAPSULE, RACE, DCAPS, and DPROS to categorical" prostate['CAPSULE'] = prostate['CAPSULE'].asfactor() prostate['RACE'] = prostate['CAPSULE'].asfactor() prostate['DCAPS'] = prostate['DCAPS'].asfactor() prostate['DPROS'] = prostate['DPROS'].asfactor() print "Compare with Naive Bayes when x = 3:9, y = 2" prostate_nb = h2o.naive_bayes(x=prostate[2:9], y=prostate[1], laplace=0) prostate_nb.show() print "Predict on training data" prostate_pred = prostate_nb.predict(prostate) prostate_pred.head()
def nb_prostate(ip, port): h2o.init(ip, port) print "Importing prostate.csv data..." prostate = h2o.upload_file(h2o.locate("smalldata/logreg/prostate.csv")) print "Converting CAPSULE, RACE, DCAPS, and DPROS to categorical" prostate["CAPSULE"] = prostate["CAPSULE"].asfactor() prostate["RACE"] = prostate["CAPSULE"].asfactor() prostate["DCAPS"] = prostate["DCAPS"].asfactor() prostate["DPROS"] = prostate["DPROS"].asfactor() print "Compare with Naive Bayes when x = 3:9, y = 2" prostate_nb = h2o.naive_bayes(x=prostate[2:9], y=prostate[1], laplace=0) prostate_nb.show() print "Predict on training data" prostate_pred = prostate_nb.predict(prostate) prostate_pred.head()
def nb_prostate(): print("Importing prostate.csv data...") prostate = h2o.upload_file(pyunit_utils.locate("smalldata/logreg/prostate.csv")) print("Converting CAPSULE, RACE, DCAPS, and DPROS to categorical") prostate['CAPSULE'] = prostate['CAPSULE'].asfactor() prostate['RACE'] = prostate['CAPSULE'].asfactor() prostate['DCAPS'] = prostate['DCAPS'].asfactor() prostate['DPROS'] = prostate['DPROS'].asfactor() print("Compare with Naive Bayes when x = 3:9, y = 2") prostate_nb = h2o.naive_bayes(x=prostate[2:9], y=prostate[1], laplace = 0) prostate_nb.show() print("Predict on training data") prostate_pred = prostate_nb.predict(prostate) prostate_pred.head()
def nb_baddata(): rawdata = [[random.gauss(0, 1) for r in range(100)] for c in range(10)] print "Training data with all NA's" train = [["NA" for r in range(100)] for c in range(10)] train_h2o = h2o.H2OFrame(python_obj=train) try: h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0]) assert False, "Expected naive bayes algo to fail on training data of all NA's" except: pass # Response column must be categorical print "Training data with a numeric response column" train_h2o = h2o.H2OFrame(python_obj=rawdata) try: h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0]) assert False, "Expected naive bayes algo to fail on training data with a numeric response column" except: pass # Constant response dropped before model building print "Training data with a constant response: drop and throw error" rawdata[0] = 100 * ["A"] train_h2o = h2o.H2OFrame(python_obj=rawdata) try: h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0]) assert ( False ), "Expected naive bayes algo to fail on training data with a constant response: drop and throw error" except: pass # Predictors with constant value automatically dropped print "Training data with 1 col of all 5's: drop automatically" rawdata = [[random.gauss(0, 1) for r in range(100)] for c in range(10)] rawdata[4] = 100 * [5] rawdata[0] = [random.choice(string.letters) for _ in range(100)] train_h2o = h2o.H2OFrame(python_obj=rawdata) model = h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0]) assert len(model._model_json["output"]["pcond"]) == 8, "Expected 8 predictors, but got {0}" "".format( len(model._model_json["output"]["pcond"]) )
def nb_baddata(): rawdata = [[random.gauss(0, 1) for c in range(10)] for r in range(100)] print "Training data with all NA's" train = [["NA" for c in range(10)] for r in range(100)] train_h2o = h2o.H2OFrame(python_obj=train) try: h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0]) assert False, "Expected naive bayes algo to fail on training data of all NA's" except: pass # Response column must be categorical print "Training data with a numeric response column" train_h2o = h2o.H2OFrame(python_obj=rawdata) try: h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0]) assert False, "Expected naive bayes algo to fail on training data with a numeric response column" except: pass # Constant response dropped before model building print "Training data with a constant response: drop and throw error" for r in range(100): rawdata[r][0] = "A" train_h2o = h2o.H2OFrame(python_obj=rawdata) try: h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0]) assert False, "Expected naive bayes algo to fail on training data with a constant response: drop and throw error" except: pass # Predictors with constant value automatically dropped print "Training data with 1 col of all 5's: drop automatically" rawdata = [[random.gauss(0, 1) for c in range(10)] for r in range(100)] for r in range(100): rawdata[r][4] = 5 for r in range(100): rawdata[r][0] = random.choice(string.letters) train_h2o = h2o.H2OFrame(python_obj=rawdata) model = h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0]) assert len(model._model_json['output']['pcond']) == 8, "Expected 8 predictors, but got {0}" \ "".format(len(model._model_json['output']['pcond']))