def test_adaboost_base_estimator(self): np.random.seed(0) stump_estimator = SklTreeLearner(max_depth=1) tree_estimator = SklTreeLearner() stump = SklAdaBoostLearner(base_estimator=stump_estimator) tree = SklAdaBoostLearner(base_estimator=tree_estimator) results = CrossValidation(self.iris, [stump, tree], k=4) ca = CA(results) self.assertLess(ca[0], ca[1])
def test_adaboost_base_estimator(self): np.random.seed(0) stump_estimator = SklTreeLearner(max_depth=1) tree_estimator = SklTreeLearner() stump = SklAdaBoostClassificationLearner( base_estimator=stump_estimator, n_estimators=5) tree = SklAdaBoostClassificationLearner(base_estimator=tree_estimator, n_estimators=5) cv = CrossValidation(k=4) results = cv(self.iris, [stump, tree]) ca = CA(results) self.assertLessEqual(ca[0], ca[1])
def test_input_learner(self): """Check if base learner properly changes with learner on the input""" max_depth = 2 default_base_est = self.widget.base_estimator self.assertIsInstance(default_base_est, SklTreeLearner) self.assertIsNone(default_base_est.params.get("max_depth")) self.send_signal("Learner", SklTreeLearner(max_depth=max_depth)) self.assertEqual(self.widget.base_estimator.params.get("max_depth"), max_depth) self.widget.apply_button.button.click() output_base_est = self.get_output("Learner").params.get("base_estimator") self.assertEqual(output_base_est.max_depth, max_depth)
def test_tree(self): tree = SklTreeLearner() res = CrossValidation(self.iris, [tree], k=2) self.assertGreater(AUC(res)[0], 0.8) self.assertLess(AUC(res)[0], 1.)
def test_classification(self): table = Table('iris') learn = SklTreeLearner() clf = learn(table) Z = clf(table) self.assertTrue(np.all(table.Y.flatten() == Z))
with open(exportFilePath, "w") as output: datae1.to_csv(output, header=True, sep=",") path1 = ("C:\\Users\\acer\\Desktop\\friends\\export.csv") datae = p.read_csv(path1) ''' now fro b part''' from Orange.classification import SklTreeLearner td = Table.from_file("C:\\Users\\acer\\Desktop\\friends\\export.csv") #print(data1.domain) #print(d) feature_vars = list(td.domain.variables[1:]) class_label_var = td.domain.variables[7] print(class_label_var) md = Domain(feature_vars, class_label_var) #print(d_dis[0]) td = Table.from_table(domain=md, source=td) #print(.domain.variables[1:]) n1 = td.approx_len() print(n1 * 80 / 100) train_data_set = td[:1360] test_data_set = td[1360:] #print(train_data_set.domain) #print(test_data_set.domain) tree_learner = SklTreeLearner() decision_tree = tree_learner(train_data_set) results = CrossValidation(td, [tree_learner], k=10) print(decision_tree(test_data_set)) print("Accuracy", scoring.CA(results)[0]) print("AUC", scoring.AUC(results)[0])
class OWAdaBoostClassification(OWBaseLearner): name = "AdaBoost" description = "An ensemble meta-algorithm that combines weak learners " \ "and adapts to the 'hardness' of each training sample. " icon = "icons/AdaBoost.svg" priority = 80 LEARNER = SklAdaBoostLearner inputs = [("Learner", LearnerClassification, "set_base_learner")] losses = ["SAMME", "SAMME.R"] n_estimators = Setting(50) learning_rate = Setting(1.) algorithm = Setting(0) DEFAULT_BASE_ESTIMATOR = SklTreeLearner() class Error(OWBaseLearner.Error): no_weight_support = Msg('The base learner does not support weights.') def add_main_layout(self): box = gui.widgetBox(self.controlArea, "Parameters") self.base_estimator = self.DEFAULT_BASE_ESTIMATOR self.base_label = gui.label( box, self, "Base estimator: " + self.base_estimator.name) self.n_estimators_spin = gui.spin(box, self, "n_estimators", 1, 100, label="Number of estimators:", alignment=Qt.AlignRight, controlWidth=80, callback=self.settings_changed) self.learning_rate_spin = gui.doubleSpin( box, self, "learning_rate", 1e-5, 1.0, 1e-5, label="Learning rate:", decimals=5, alignment=Qt.AlignRight, controlWidth=80, callback=self.settings_changed) self.add_specific_parameters(box) def add_specific_parameters(self, box): self.algorithm_combo = gui.comboBox(box, self, "algorithm", label="Algorithm:", items=self.losses, orientation=Qt.Horizontal, callback=self.settings_changed) def create_learner(self): if self.base_estimator is None: return None return self.LEARNER(base_estimator=self.base_estimator, n_estimators=self.n_estimators, learning_rate=self.learning_rate, preprocessors=self.preprocessors, algorithm=self.losses[self.algorithm]) def set_base_learner(self, learner): self.Error.no_weight_support.clear() if learner and not learner.supports_weights: # Clear the error and reset to default base learner self.Error.no_weight_support() self.base_estimator = None self.base_label.setText("Base estimator: INVALID") else: self.base_estimator = learner or self.DEFAULT_BASE_ESTIMATOR self.base_label.setText("Base estimator: " + self.base_estimator.name) if self.auto_apply: self.apply() def get_learner_parameters(self): return (("Base estimator", self.base_estimator), ("Number of estimators", self.n_estimators), ("Algorithm", self.losses[self.algorithm].capitalize()))
def build_decision_tree(max_leaf=None): tree_learner = SklTreeLearner(max_leaf_nodes=max_leaf) decision_tree = tree_learner(train_dataset) print(decision_tree) return tree_learner, decision_tree
""" #rm_elem takes input list,l and index, i and returns a tuple (a,b), a is the #element at l[i] and b is the list without a. def rm_elem(l, i): (a,b) = ('', []) for x in range(len(l)): if x==i: a = l[x] else: b.append(l[x]) return (a,b) data_tab = Table.from_file(csv_path_fixed) class_label_var, feature_vars = rm_elem(data_tab.domain, inx_class_label) assg1_domain = Domain(feature_vars, class_label_var) data_tab = Table.from_table(domain=assg1_domain, source=data_tab) tree_learner = SklTreeLearner() eval_results = CrossValidation(data_tab, [tree_learner], k=10) print("Accuracy of cross validation: {:.3f}".format(scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0])) """ Setting max_leaf_nodes to 4 instead of allowing it to be unlimited as in previous part. """ tree_learner_max = SklTreeLearner(max_leaf_nodes = 4) eval_results_max = CrossValidation(data_tab, [tree_learner_max], k=10) print("\n\nResult of a decision tree with the limitation to only use 4 leaf nodes\n ") print("Accuracy of cross validation: {:.3f}".format(scoring.CA(eval_results_max)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results_max)[0]))