Beispiel #1
0
def naivebayes_predict(input_dict):
    from discomll.classification import naivebayes
    m = 1 if input_dict["m"] == "" else input_dict["m"]

    predictions_url = naivebayes.predict(input_dict["dataset"],
                                         fitmodel_url=input_dict["fitmodel_url"],
                                         m=input_dict["m"],
                                         save_results=True)

    return {"string": predictions_url}
Beispiel #2
0
def naivebayes_predict(input_dict):
    from discomll.classification import naivebayes
    m = 1 if input_dict["m"] == "" else input_dict["m"]

    predictions_url = naivebayes.predict(
        input_dict["dataset"],
        fitmodel_url=input_dict["fitmodel_url"],
        m=input_dict["m"],
        save_results=True)

    return {"string": predictions_url}
    def test_naivebayes_breastcancer_cont(self):
        # python -m unittest tests_classification.Tests_Classification.test_naivebayes_breastcancer_cont
        from sklearn.naive_bayes import GaussianNB
        from discomll.classification import naivebayes

        x_train, y_train, x_test, y_test = datasets.breastcancer_cont(replication=1)
        train_data, test_data = datasets.breastcancer_cont_discomll(replication=1)

        clf = GaussianNB()
        probs_log1 = clf.fit(x_train, y_train).predict_proba(x_test)

        fitmodel_url = naivebayes.fit(train_data)
        prediction_url = naivebayes.predict(test_data, fitmodel_url)
        probs_log2 = [v[1] for _, v in result_iterator(prediction_url)]

        self.assertTrue(np.allclose(probs_log1, probs_log2, atol=1e-8))
Beispiel #4
0
    def test_naivebayes_breastcancer_cont(self):
        # python -m unittest tests_classification.Tests_Classification.test_naivebayes_breastcancer_cont
        from sklearn.naive_bayes import GaussianNB
        from discomll.classification import naivebayes

        x_train, y_train, x_test, y_test = datasets.breastcancer_cont(replication=1)
        train_data, test_data = datasets.breastcancer_cont_discomll(replication=1)

        clf = GaussianNB()
        probs_log1 = clf.fit(x_train, y_train).predict_proba(x_test)

        fitmodel_url = naivebayes.fit(train_data)
        prediction_url = naivebayes.predict(test_data, fitmodel_url)
        probs_log2 = [v[1] for _, v in result_iterator(prediction_url)]

        self.assertTrue(np.allclose(probs_log1, probs_log2, atol=1e-8))
Beispiel #5
0
    def test_naivebayes_breastcancer(self):
        # python -m unittest tests_classification.Tests_Classification.test_naivebayes_breastcancer
        from discomll.classification import naivebayes
        train_data1, test_data1 = datasets.breastcancer_disc_orange()
        train_data2, test_data2 = datasets.breastcancer_disc_discomll()

        for m in range(3):
            learner = Orange.classification.bayes.NaiveLearner(m=m)
            classifier = learner(train_data1)
            predictions1 = [classifier(inst, Orange.classification.Classifier.GetBoth) for inst in test_data1]
            predictions1_target = [v[0].value for v in predictions1]
            predictions1_probs = [v[1].values() for v in predictions1]

            fitmodel_url = naivebayes.fit(train_data2)
            predictions_url = naivebayes.predict(test_data2, fitmodel_url, m=m)
            predictions2_target = []
            predictions2_probs = []
            for k, v in result_iterator(predictions_url):
                predictions2_target.append(v[0])
                predictions2_probs.append(v[1])

            self.assertListEqual(predictions1_target, predictions2_target)
            self.assertTrue(np.allclose(predictions1_probs, predictions2_probs))
    def test_naivebayes_breastcancer(self):
        # python -m unittest tests_classification.Tests_Classification.test_naivebayes_breastcancer
        from discomll.classification import naivebayes

        train_data1, test_data1 = datasets.breastcancer_disc_orange()
        train_data2, test_data2 = datasets.breastcancer_disc_discomll()

        for m in range(3):
            learner = Orange.classification.bayes.NaiveLearner(m=m)
            classifier = learner(train_data1)
            predictions1 = [classifier(inst, Orange.classification.Classifier.GetBoth) for inst in test_data1]
            predictions1_target = [v[0].value for v in predictions1]
            predictions1_probs = [v[1].values() for v in predictions1]

            fitmodel_url = naivebayes.fit(train_data2)
            predictions_url = naivebayes.predict(test_data2, fitmodel_url, m=m)
            predictions2_target = []
            predictions2_probs = []
            for k, v in result_iterator(predictions_url):
                predictions2_target.append(v[0])
                predictions2_probs.append(v[1])

            self.assertListEqual(predictions1_target, predictions2_target)
            self.assertTrue(np.allclose(predictions1_probs, predictions2_probs))
from discomll import dataset
from discomll.classification import naivebayes

train = dataset.Data(data_tag=["http://ropot.ijs.si/data/lymphography/train/xaaaaa.gz",
                               "http://ropot.ijs.si/data/lymphography/train/xaaabj.gz"],
                     data_type="gzip",
                     generate_urls=True,
                     X_indices=range(2, 20),
                     id_index=0,
                     y_index=1,
                     X_meta=["d", "d", "d", "d", "d", "d", "d", "d", "c", "c", "d", "d", "d", "d", "d", "d", "d", "c"],
                     delimiter=",")

test = dataset.Data(data_tag=["http://ropot.ijs.si/data/lymphography/test/xaaaaa.gz",
                              "http://ropot.ijs.si/data/lymphography/test/xaaabj.gz"],
                    data_type="gzip",
                    generate_urls=True,
                    X_indices=range(2, 20),
                    id_index=0,
                    y_index=1,
                    X_meta=["d", "d", "d", "d", "d", "d", "d", "d", "c", "c", "d", "d", "d", "d", "d", "d", "d", "c"],
                    delimiter=",")

fit_model = naivebayes.fit(train)
predictions = naivebayes.predict(test, fit_model)
print predictions