def test_arboreal_set_params(self): # test that the set_params function sets parameters at = ArborealTree() at.set_params(**self.nondefault_vals) # assert at has the right number of attributes set: user_attributes = [ a for a in vars(at) if not (a.startswith("_") or a.endswith("_")) ] self.assertEqual(len(user_attributes), len(self.nondefault_vals)) # now check equality of nondefault_vals and set values: for arg_name, nondefault_val in self.nondefault_vals.items(): self.assertEqual(getattr(at, arg_name), nondefault_val)
def test_arboreal_constructor(self): # test that the constructor receives input args and sets them at = ArborealTree() # assert at has the right number of attributes set: user_attributes = [ a for a in vars(at) if not (a.startswith("_") or a.endswith("_")) ] self.assertEqual(len(user_attributes), len(self.default_vals)) # now check equality of default values and set values: for arg_name, default_val in self.default_vals.items(): self.assertEqual(getattr(at, arg_name), default_val)
def test_grid_search_interface_for_arboreal_tree(self): param_grid = { "bootstrap_criterion_fraction_threshold": [0.8, 0.999_999_999], "maximum_bootstrap_branching_factor": [1, 2, 3], "feature_subset_fraction": [None, 0.4], "min_samples_split": [2, 3], } arb = ArborealTree(random_seed=self.random_seed) gs = GridSearchCV( arb, param_grid, cv=3, scoring="accuracy", iid=True, verbose=0, n_jobs=self.n_jobs, ) # verbose = 2 for helpful info during the search gs.fit(self.X_train, self.y_train) gs_accuracy = gs.score(self.X_test, self.y_test) expected_accuracy = 0.90 self.assertAlmostEqual(gs_accuracy, expected_accuracy, places=2)
def test_arboreal_set_then_get_params(self): at = ArborealTree() at.set_params(**self.nondefault_vals) got_params = at.get_params() self.assertEqual(got_params, self.nondefault_vals)
def test_arboreal_get_params(self): at = ArborealTree() got_params = at.get_params() self.assertEqual(got_params, self.default_vals)
def test_arboreal_constructor_and_set_params_are_equal(self): at_1 = ArborealTree(**self.default_vals) at_2 = ArborealTree() at_2.set_params(**self.default_vals) self.assertEqual(at_1.get_params(), at_2.get_params()) at_3 = ArborealTree(**self.nondefault_vals) at_4 = ArborealTree() at_4.set_params(**self.nondefault_vals) self.assertEqual(at_3.get_params(), at_4.get_params())
"sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)", ] m.categoricals = ["target"] m.target = "target" # Create an Arboreal Dataset for train and test train_dataset = Dataset(metadata=m, datapoints=train_datapoints) test_dataset = Dataset( metadata=m, datapoints=test_datapoints, validate_target=False ) # Fit an ArborealTree on the train set tree = ArborealTree() # or DecisionTree() or RandomForest() tree.fit(train_dataset) # Predict data points in the test set predictions = tree.transform(test_dataset) # Evaluate our performance on the test set results = [] prediction_datatypes = set() for dp in test_datapoints_for_eval: target = dp["target"] prediction = predictions[dp["identifier"]] predicted_value = prediction[0] prediction_datatype = prediction[1] prediction_datatypes.add(prediction_datatype) assert (