def test_split(self): left, right = split_data(self.data, 0, 25) for point in left: self.assertLess(point.values[0], 25) self.assertEqual(len(left), 3) for point in right: self.assertGreaterEqual(point.values[0], 25) self.assertEqual(len(right), 7)
def test_best_split(self): feature, thresh = find_best_split(self.data) self.assertEqual(feature, 1) self.assertEqual(thresh, 38000) left, right = split_data(self.data, feature, thresh) feature, thresh = find_best_split(left) self.assertEqual(feature, None) self.assertEqual(thresh, None) feature, thresh = find_best_split(right) self.assertEqual(feature, 0) self.assertEqual(thresh, 43)
def test_split_data(): dataset = [[2.771244718,1.784783929,0], [1.728571309,1.169761413,0], [3.678319846,2.81281357,0], [3.961043357,2.61995032,0], [2.999208922,2.209014212,0], [7.497545867,3.162953546,1], [9.00220326,3.339047188,1], [7.444542326,0.476683375,1], [10.12493903,3.234550982,1], [6.642287351,3.319983761,1]] dataset = np.asarray(dataset) train_data = np.delete(dataset, 2, axis=1) train_labels = np.delete(dataset, (0,1), axis=1) print(rF.entropy((train_labels.reshape(1,10)[0].tolist()))) r_d,r_l,l_d,l_l = rF.split_data(train_data, train_labels, 0, 2) print(r_d,r_l,l_d,l_l)