def test_nan_columns(self): data = Orange.data.Table("iris") data.X[:, (1, 3)] = np.NaN lr = LogisticRegressionLearner() cv = CrossValidation(k=2, store_models=True) res = cv(data, [lr]) self.assertEqual(len(res.models[0][0].domain.attributes), 2) self.assertGreater(Orange.evaluation.CA(res)[0], 0.8)
def test_scores_log_reg_overfitted(self): table = Table(self.scores_domain, list(zip(*self.scores_table_values + [list("yyyn")]))) self.assertTupleEqual( self._test_scores(table, table, LogisticRegressionLearner(), OWTestLearners.TestOnTest, None), (1, 1, 1, 1, 1))
def test_coef_table_multiple(self): data = Table("zoo") learn = LogisticRegressionLearner() classifier = learn(data) coef_table = create_coef_table(classifier) self.assertEqual(1, len(stats(coef_table.metas, None))) self.assertEqual(len(coef_table), len(classifier.domain.attributes) + 1) self.assertEqual(len(coef_table[0]), len(classifier.domain.class_var.values))
def test_multinomial(self): table = Table("titanic") lr = LogisticRegressionLearner() assert isinstance(lr, Orange.classification.SklLearner) cv = CrossValidation(k=2) res = cv(table, [lr]) self.assertGreater(Orange.evaluation.AUC(res)[0], 0.7) self.assertLess(Orange.evaluation.AUC(res)[0], 0.9)
def test_coef_table_single(self): data = Table("titanic") learn = LogisticRegressionLearner() classifier = learn(data) coef_table = create_coef_table(classifier) self.assertEqual(1, len(stats(coef_table.metas, None))) self.assertEqual(len(coef_table), len(classifier.domain.attributes) + 1) self.assertEqual(len(coef_table[0]), 1)
def test_unregister_prediction_model(self): log_reg_iris = LogisticRegressionLearner()(self.iris) self.send_signal(self.widget.Inputs.predictors, log_reg_iris) self.send_signal(self.widget.Inputs.data, self.iris) self.widget.selection_store.unregister = Mock() prev_model = self.widget.predictionsview.model() self.send_signal(self.widget.Inputs.predictors, log_reg_iris) self.widget.selection_store.unregister.called_with(prev_model)
def test_send_coefficients(self): """ Coefficients are only available if Logistic regression is used """ w = self.widget # none when no data (model not build) self.assertIsNone(self.get_output(w.Outputs.coefficients)) # by default LogisticRegression so coefficients exists self.send_signal(w.Inputs.data, self.iris) # to check correctness before degree is changed num_coefficients = sum(i + 1 for i in range(w.degree + 1)) self.assertEqual(len(self.get_output(w.Outputs.coefficients)), num_coefficients) # change degree for j in range(1, 6): w.degree_spin.setValue(j) num_coefficients = sum(i + 1 for i in range(w.degree + 1)) self.assertEqual(len(self.get_output(w.Outputs.coefficients)), num_coefficients) # change learner which does not have coefficients learner = TreeLearner self.send_signal(w.Inputs.learner, learner()) self.assertIsNone(self.get_output(w.Outputs.coefficients)) # remove learner self.send_signal(w.Inputs.learner, None) # to check correctness before degree is changed num_coefficients = sum(i + 1 for i in range(w.degree + 1)) self.assertEqual(len(self.get_output(w.Outputs.coefficients)), num_coefficients) # change degree for j in range(1, 6): w.degree_spin.setValue(j) num_coefficients = sum(i + 1 for i in range(w.degree + 1)) self.assertEqual(len(self.get_output(w.Outputs.coefficients)), num_coefficients) # manulay set LogisticRegression self.send_signal(w.Inputs.learner, LogisticRegressionLearner()) # to check correctness before degree is changed num_coefficients = sum(i + 1 for i in range(w.degree + 1)) self.assertEqual(len(self.get_output(w.Outputs.coefficients)), num_coefficients) # change degree for j in range(1, 6): w.degree_spin.setValue(j) num_coefficients = sum(i + 1 for i in range(w.degree + 1)) self.assertEqual(len(self.get_output(w.Outputs.coefficients)), num_coefficients)
def test_set_learner(self): """ Test if learner is set correctly """ w = self.widget learner = TreeLearner() # check if empty self.assertEqual(w.learner_other, None) self.assertTrue(isinstance(w.learner, LogisticRegressionLearner)) self.assertTrue(isinstance(w.learner, w.LEARNER)) self.assertEqual( type(self.get_output("Learner")), type(LogisticRegressionLearner())) self.send_signal("Learner", learner) # check if learners set correctly self.assertEqual(w.learner_other, learner) self.assertEqual(type(w.learner), type(learner)) self.assertEqual(type(self.get_output("Learner")), type(learner)) # after learner is removed there should be LEARNER used self.send_signal("Learner", None) self.assertEqual(w.learner_other, None) self.assertTrue(isinstance(w.learner, LogisticRegressionLearner)) self.assertTrue(isinstance(w.learner, w.LEARNER)) self.assertEqual( type(self.get_output("Learner")), type(LogisticRegressionLearner())) # set it again just in case something goes wrong learner = RandomForestLearner() self.send_signal("Learner", learner) self.assertEqual(w.learner_other, learner) self.assertEqual(type(w.learner), type(learner)) self.assertEqual(type(self.get_output("Learner")), type(learner)) # change learner this time not from None learner = TreeLearner() self.send_signal("Learner", learner) self.assertEqual(w.learner_other, learner) self.assertEqual(type(w.learner), type(learner)) self.assertEqual(type(self.get_output("Learner")), type(learner))
def test_precision_iris(self): learner = LogisticRegressionLearner(preprocessors=[]) res = TestOnTrainingData()(self.iris, [learner]) self.assertGreater(self.score(res, average='weighted')[0], 0.95) self.assertGreater(self.score(res, target=1)[0], 0.95) self.assertGreater(self.score(res, target=1, average=None)[0], 0.95) self.assertGreater(self.score(res, target=1, average='weighted')[0], 0.95) self.assertGreater(self.score(res, target=0, average=None)[0], 0.99) self.assertGreater(self.score(res, target=2, average=None)[0], 0.94)
def test_recall_iris(self): learner = LogisticRegressionLearner(preprocessors=[]) res = TestOnTrainingData(self.iris, [learner]) self.assertAlmostEqual(self.score(res, average="weighted")[0], 0.96, 5) self.assertAlmostEqual(self.score(res, target=1)[0], 0.9, 5) self.assertAlmostEqual(self.score(res, target=1, average=None)[0], 0.9, 5) self.assertAlmostEqual(self.score(res, target=1, average="weighted")[0], 0.9, 5) self.assertAlmostEqual(self.score(res, target=0, average=None)[0], 1, 5) self.assertAlmostEqual(self.score(res, target=2, average=None)[0], 0.98, 5)
def test_scores_cross_validation(self): """ Test more than two classes and cross-validation """ self.assertTrue( all(x >= y for x, y in zip( self._test_scores( Table("iris")[::15], None, LogisticRegressionLearner(), OWTestAndScore.KFold, 0), (0.8, 0.5, 0.5, 0.5, 0.5))))
def results_for_preview(data_name=""): from Orange.data import Table from Orange.evaluation import CrossValidation from Orange.classification import \ LogisticRegressionLearner, SVMLearner, NuSVMLearner data = Table(data_name or "ionosphere") results = CrossValidation( data, [LogisticRegressionLearner(penalty="l2"), LogisticRegressionLearner(penalty="l1"), SVMLearner(probability=True), NuSVMLearner(probability=True) ], store_data=True ) results.learner_names = ["LR l2", "LR l1", "SVM", "Nu SVM"] return results
def test_errors(self): learner = LogisticRegressionLearner(preprocessors=[]) res = TestOnTrainingData()(self.iris, [learner]) # binary average does not work for number of classes different than 2 self.assertRaises(ValueError, self.score, res, average="binary") # implemented only weighted and binary averaging self.assertRaises(ValueError, self.score, res, average="abc")
def test_single_class(self): t = self.iris[60:90] self.assertEqual(len(np.unique(t.Y)), 1) learn = LogisticRegressionLearner() with self.assertWarns(UserWarning): model = learn(t) self.assertEqual(model(t[0]), 1) self.assertTrue(np.all(model(t[0], ret=Model.Probs) == [0, 1, 0])) self.assertTrue(np.all(model(t) == 1))
def test_nc_type(self): nc_class = InverseProbability(LogisticRegressionLearner()) nc_regr = AbsError(LinearRegressionLearner()) TransductiveClassifier(nc_class) self.assertRaises(AssertionError, TransductiveClassifier, nc_regr) InductiveClassifier(nc_class) self.assertRaises(AssertionError, InductiveClassifier, nc_regr) CrossClassifier(nc_class, 5) self.assertRaises(AssertionError, CrossClassifier, nc_regr, 5)
def test_summary(self): """Check if the status bar updates when data is recieved""" data = self.iris info = self.widget.info no_input, no_output = "No data on input", "No data on output" predictor1 = ConstantLearner()(self.iris) predictor2 = LogisticRegressionLearner()(self.iris) self.send_signal(self.widget.Inputs.predictors, predictor1) details = f"Data:<br>{no_input}.<hr>" + \ "Model: 1 model (1 failed)<ul><li>constant</li></ul>" self.assertEqual(info._StateInfo__input_summary.brief, "0") self.assertEqual(info._StateInfo__input_summary.details, details) self.assertEqual(info._StateInfo__output_summary.brief, "") self.assertEqual(info._StateInfo__output_summary.details, no_output) self.send_signal(self.widget.Inputs.data, data) details = "Data:<br>" + \ format_summary_details(data).replace('\n', '<br>') + \ "<hr>Model: 1 model<ul><li>constant</li></ul>" self.assertEqual(info._StateInfo__input_summary.brief, "150") self.assertEqual(info._StateInfo__input_summary.details, details) output = self.get_output(self.widget.Outputs.predictions) summary, details = f"{len(output)}", format_summary_details(output) self.assertEqual(info._StateInfo__output_summary.brief, summary) self.assertEqual(info._StateInfo__output_summary.details, details) self.send_signal(self.widget.Inputs.predictors, predictor2, 1) details = "Data:<br>"+ \ format_summary_details(data).replace('\n', '<br>') + \ "<hr>Model: 2 models<ul><li>constant</li>" + \ "<li>logistic regression</li></ul>" self.assertEqual(info._StateInfo__input_summary.brief, "150") self.assertEqual(info._StateInfo__input_summary.details, details) output = self.get_output(self.widget.Outputs.predictions) summary, details = f"{len(output)}", format_summary_details(output) self.assertEqual(info._StateInfo__output_summary.brief, summary) self.assertEqual(info._StateInfo__output_summary.details, details) self.send_signal(self.widget.Inputs.predictors, None) self.send_signal(self.widget.Inputs.predictors, None, 1) details = "Data:<br>" + \ format_summary_details(data).replace('\n', '<br>') + \ "<hr>Model:<br>No model on input." self.assertEqual(info._StateInfo__input_summary.brief, "150") self.assertEqual(info._StateInfo__input_summary.details, details) output = self.get_output(self.widget.Outputs.predictions) summary, details = f"{len(output)}", format_summary_details(output) self.assertEqual(info._StateInfo__output_summary.brief, summary) self.assertEqual(info._StateInfo__output_summary.details, details) self.send_signal(self.widget.Inputs.data, None) self.assertEqual(info._StateInfo__input_summary.brief, "") self.assertEqual(info._StateInfo__input_summary.details, no_input) self.assertEqual(info._StateInfo__output_summary.brief, "") self.assertEqual(info._StateInfo__output_summary.details, no_output)
def test_nonexchangeability(self): tab = Table(os.path.join(os.path.dirname(__file__), '../data/usps.tab')) train, test = split_data(tab, 7291, 2007) test = test[:200] train, calibrate = split_data(train, 3, 1) icp = InductiveClassifier( InverseProbability(LogisticRegressionLearner()), train, calibrate) err = [inst.get_class() not in icp(inst.x, 0.1) for inst in test] self.assertGreater(sum(err) / len(test), 0.13)
def test_auto_solver(self): # These defaults are valid as of sklearn v0.23.0 # lbfgs is default for l2 penalty lr = LogisticRegressionLearner(penalty="l2", solver="auto") skl_clf = lr._initialize_wrapped() self.assertEqual(skl_clf.solver, "lbfgs") self.assertEqual(skl_clf.penalty, "l2") # lbfgs is default for no penalty lr = LogisticRegressionLearner(penalty=None, solver="auto") skl_clf = lr._initialize_wrapped() self.assertEqual(skl_clf.solver, "lbfgs") self.assertEqual(skl_clf.penalty, None) # liblinear is default for l2 penalty lr = LogisticRegressionLearner(penalty="l1", solver="auto") skl_clf = lr._initialize_wrapped() self.assertEqual(skl_clf.solver, "liblinear") self.assertEqual(skl_clf.penalty, "l1")
def test_report_widgets_evaluate(self): rep = OWReport.get_instance() data = Table("zoo") widgets = self.eval_widgets results = CrossValidation(data, [LogisticRegressionLearner()], store_data=True) results.learner_names = ["LR l2"] w = self.create_widget(OWTestLearners) set_learner = getattr(w, w.Inputs.learner.handler) set_train = getattr(w, w.Inputs.train_data.handler) set_test = getattr(w, w.Inputs.test_data.handler) set_learner(LogisticRegressionLearner(), 0) set_train(data) set_test(data) w.create_report_html() rep.make_report(w) self._create_report(widgets, rep, results)
def test_scores_log_reg_bad2(self): table_train = Table.from_list( self.scores_domain, list(zip(*(self.scores_table_values + [list("nnyy")])))) table_test = Table.from_list( self.scores_domain, list(zip(*(self.scores_table_values + [list("yynn")])))) self.assertTupleEqual(self._test_scores( table_train, table_test, LogisticRegressionLearner(), OWTestLearners.TestOnTest, None), (0, 0, 0, 0, 0))
def test_np_data(self): """ Test ThresholdModel with numpy data. When passing numpy data to model they should be already transformed to models domain since model do not know how to do it. """ data = Table('heart_disease') base_learner = LogisticRegressionLearner() model = ThresholdLearner(base_learner)(data) res = model(model.data_to_model_domain(data).X) self.assertTupleEqual((len(data), ), res.shape)
def test_loo(self): train, test = get_instance(Table('iris'), 0) loocp = LOOClassifier(InverseProbability(LogisticRegressionLearner()), train) pred = loocp(test.x, 0.1) self.assertEqual(pred, ['Iris-setosa']) train, test = get_instance(Table('housing'), 0) loocr = LOORegressor(AbsError(LinearRegressionLearner()), train) lo, hi = loocr(test.x, 0.1) self.assertLess(hi - lo, 20)
def test_constant_feature_cont(self): """Check nomogram for data with constant continuous feature""" domain = Domain( [DiscreteVariable("d", ("a", "b")), ContinuousVariable("c")], DiscreteVariable("cls", ("c", "d"))) X = np.array([[0, 0], [1, 0], [0, 0], [1, 0]]) data = Table(domain, X, np.array([0, 1, 1, 0])) cls = NaiveBayesLearner()(data) self._test_helper(cls, [50, 50]) cls = LogisticRegressionLearner()(data) self._test_helper(cls, [50, 50])
def test_nomogram_with_instance_lr(self): """Check initialized marker values and feature sorting for logistic regression classifier and data on input""" cls = LogisticRegressionLearner()(self.titanic) data = self.titanic[10:11] self.send_signal("Classifier", cls) self.send_signal("Data", data) self._check_values(data.domain.attributes, data) self._test_sort([["status", "age", "sex"], ["age", "sex", "status"], ["sex", "status", "age"], ["sex", "status", "age"], ["sex", "status", "age"]])
def test_select_data_first(self): log_reg_iris = LogisticRegressionLearner()(self.iris) self.send_signal(self.widget.Inputs.data, self.iris) self.send_signal(self.widget.Inputs.predictors, log_reg_iris) pred_model = self.widget.predictionsview.model() pred_model.sort(0) self.widget.predictionsview.selectRow(1) sel = {(index.row(), index.column()) for index in self.widget.dataview.selectionModel().selectedIndexes()} self.assertEqual(sel, {(1, col) for col in range(5)})
def test_run(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3) self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab)) tab = Table('housing') cr = InductiveRegressor(AbsError(LinearRegressionLearner())) r = run(cr, 0.1, CrossSampler(tab, 4), rep=3) self.assertEqual(len(r.preds), 3 * len(tab))
def test_set_learner_empty(self): """ Test if learner is set correctly when no learner provided """ w = self.widget # check if empty self.assertEqual(w.learner_other, None) self.assertTrue(isinstance(w.learner, LogisticRegressionLearner)) self.assertTrue(isinstance(w.learner, w.LEARNER)) self.assertEqual(type(self.get_output(w.Outputs.learner)), type(LogisticRegressionLearner()))
def test_LogisticRegressionNormalization(self): np.random.seed(42) new_attrs = (ContinuousVariable('c0'), ) + self.iris.domain.attributes new_domain = Domain(new_attrs, self.iris.domain.class_vars, self.iris.domain.metas) new_table = np.hstack((1000000 * np.random.random( (self.iris.X.shape[0], 1)), self.iris)) table = self.iris.from_numpy(new_domain, new_table) lr = LogisticRegressionLearner(normalize=False) lr_norm = LogisticRegressionLearner(normalize=True) # check that normalization produces better results results = CrossValidation(table, [lr_norm, lr], k=3) ca = CA(results) self.assertGreater(ca[0], ca[1]) # check that coefficients are properly scaled back to unnormalized data model = lr_norm(table) y = np.argmax(np.dot(table.X, model.coefficients.T) + model.intercept, axis=1) np.testing.assert_array_equal(model(table), y)
def test_output_model(self): """Check if model is on output after sending data and apply""" self.assertIsNone(self.get_output(self.widget.Outputs.model)) self.send_signal("Learners", LogisticRegressionLearner(), 0) self.widget.apply_button.button.click() self.assertIsNone(self.get_output(self.widget.Outputs.model)) self.send_signal('Data', self.data) self.widget.apply_button.button.click() self.wait_until_stop_blocking() model = self.get_output(self.widget.Outputs.model) self.assertIsNotNone(model) self.assertIsInstance(model, self.widget.LEARNER.__returns__)
def test_validate_transductive(self): tab = Table('iris') eps = 0.1 correct, num, all = 0, 0, len(tab) for i in range(all): train, test = get_instance(tab, i) tcp = TransductiveClassifier( InverseProbability(LogisticRegressionLearner()), train) pred = tcp(test.x, eps) if test.get_class() in pred: correct += 1 num += len(pred) self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.01)