def test_naivebayes_breastcancer_cont(self): # python -m unittest tests_classification.Tests_Classification.test_naivebayes_breastcancer_cont from sklearn.naive_bayes import GaussianNB from discomll.classification import naivebayes x_train, y_train, x_test, y_test = datasets.breastcancer_cont(replication=1) train_data, test_data = datasets.breastcancer_cont_discomll(replication=1) clf = GaussianNB() probs_log1 = clf.fit(x_train, y_train).predict_proba(x_test) fitmodel_url = naivebayes.fit(train_data) prediction_url = naivebayes.predict(test_data, fitmodel_url) probs_log2 = [v[1] for _, v in result_iterator(prediction_url)] self.assertTrue(np.allclose(probs_log1, probs_log2, atol=1e-8))
def test_naivebayes_breastcancer(self): # python -m unittest tests_classification.Tests_Classification.test_naivebayes_breastcancer from discomll.classification import naivebayes train_data1, test_data1 = datasets.breastcancer_disc_orange() train_data2, test_data2 = datasets.breastcancer_disc_discomll() for m in range(3): learner = Orange.classification.bayes.NaiveLearner(m=m) classifier = learner(train_data1) predictions1 = [classifier(inst, Orange.classification.Classifier.GetBoth) for inst in test_data1] predictions1_target = [v[0].value for v in predictions1] predictions1_probs = [v[1].values() for v in predictions1] fitmodel_url = naivebayes.fit(train_data2) predictions_url = naivebayes.predict(test_data2, fitmodel_url, m=m) predictions2_target = [] predictions2_probs = [] for k, v in result_iterator(predictions_url): predictions2_target.append(v[0]) predictions2_probs.append(v[1]) self.assertListEqual(predictions1_target, predictions2_target) self.assertTrue(np.allclose(predictions1_probs, predictions2_probs))
from discomll import dataset from discomll.classification import naivebayes train = dataset.Data(data_tag=["http://ropot.ijs.si/data/lymphography/train/xaaaaa.gz", "http://ropot.ijs.si/data/lymphography/train/xaaabj.gz"], data_type="gzip", generate_urls=True, X_indices=range(2, 20), id_index=0, y_index=1, X_meta=["d", "d", "d", "d", "d", "d", "d", "d", "c", "c", "d", "d", "d", "d", "d", "d", "d", "c"], delimiter=",") test = dataset.Data(data_tag=["http://ropot.ijs.si/data/lymphography/test/xaaaaa.gz", "http://ropot.ijs.si/data/lymphography/test/xaaabj.gz"], data_type="gzip", generate_urls=True, X_indices=range(2, 20), id_index=0, y_index=1, X_meta=["d", "d", "d", "d", "d", "d", "d", "d", "c", "c", "d", "d", "d", "d", "d", "d", "d", "c"], delimiter=",") fit_model = naivebayes.fit(train) predictions = naivebayes.predict(test, fit_model) print predictions
def naivebayes_fit(input_dict): from discomll.classification import naivebayes fitmodel_url = naivebayes.fit(input_dict["dataset"], save_results=True) return {"fitmodel_url": fitmodel_url}