def test__predict_by_tree(): np.random.seed(42) X = np.reshape(np.arange(0, 40), (10, 4)) Y = np.random.randn(10, 1) depth_max = 4 size_min_leaf = 2 num_features = 2 split_random_location = True node = package_target._split(X, Y, num_features, split_random_location) package_target.split(node, depth_max, size_min_leaf, num_features, split_random_location, 1) with pytest.raises(AssertionError) as error: package_target._predict_by_tree(np.array([4.0, 2.0, 3.0, 1.0]), 'abc') with pytest.raises(AssertionError) as error: package_target._predict_by_tree(X, node) with pytest.raises(AssertionError) as error: package_target._predict_by_tree('abc', node) mean, std = package_target._predict_by_tree(np.array([4.0, 2.0, 3.0, 1.0]), node) print(mean) print(std) assert mean == 0.179224925920024 assert std == 0.31748922709120864
def get_random_forest( X: np.ndarray, Y: np.ndarray, num_trees: int, depth_max: int, size_min_leaf: int, num_features: int, ) -> list: """ It returns a random forest. :param X: inputs. Shape: (N, d). :type X: np.ndarray :param Y: outputs. Shape: (N, 1). :type Y: str. :param num_trees: the number of trees. :type num_trees: int. :param depth_max: maximum depth of tree. :type depth_max: int. :param size_min_leaf: minimum size of leaf. :type size_min_leaf: int. :param num_features: the number of split features. :type num_features: int. :returns: list of trees :rtype: list :raises: AssertionError """ assert isinstance(X, np.ndarray) assert isinstance(Y, np.ndarray) assert isinstance(num_trees, int) assert isinstance(depth_max, int) assert isinstance(size_min_leaf, int) assert isinstance(num_features, int) assert len(X.shape) == 2 assert len(Y.shape) == 2 assert X.shape[0] == Y.shape[0] assert Y.shape[1] == 1 ratio_sample = 1.0 replace_samples = True split_random_location = False list_trees = [] for _ in range(0, num_trees): X_, Y_ = trees_common.subsample(X, Y, ratio_sample, replace_samples) root = trees_common._split(X_, Y_, num_features, split_random_location) trees_common.split(root, depth_max, size_min_leaf, num_features, split_random_location, 1) list_trees.append(root) return list_trees
def test__predict_by_trees(): np.random.seed(42) X = np.reshape(np.arange(0, 40), (10, 4)) Y = np.random.randn(10, 1) depth_max = 4 size_min_leaf = 2 num_features = 2 split_random_location = True node_1 = package_target._split(X, Y, num_features, split_random_location) package_target.split(node_1, depth_max, size_min_leaf, num_features, split_random_location, 1) node_2 = package_target._split(X, Y, num_features, split_random_location) package_target.split(node_2, depth_max, size_min_leaf, num_features, split_random_location, 1) node_3 = package_target._split(X, Y, num_features, split_random_location) package_target.split(node_3, depth_max, size_min_leaf, num_features, split_random_location, 1) list_trees = [node_1, node_2, node_3] with pytest.raises(AssertionError) as error: package_target._predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), node_1) with pytest.raises(AssertionError) as error: package_target._predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), 'abc') with pytest.raises(AssertionError) as error: package_target._predict_by_trees(X, list_trees) with pytest.raises(AssertionError) as error: package_target._predict_by_trees('abc', list_trees) mean, std = package_target._predict_by_trees( np.array([4.0, 2.0, 3.0, 1.0]), list_trees) print(mean) print(std) assert mean == 0.12544669602080652 assert std == 0.3333040901154691
def get_generic_trees( X: np.ndarray, Y: np.ndarray, num_trees: int, depth_max: int, size_min_leaf: int, ratio_sampling: float, replace_samples: bool, num_features: int, split_random_location: bool, ) -> list: """ It returns a list of generic trees. :param X: inputs. Shape: (N, d). :type X: np.ndarray :param Y: outputs. Shape: (N, 1). :type Y: str. :param num_trees: the number of trees. :type num_trees: int. :param depth_max: maximum depth of tree. :type depth_max: int. :param size_min_leaf: minimum size of leaf. :type size_min_leaf: int. :param ratio_sampling: ratio of dataset subsampling. :type ratio_sampling: float :param replace_samples: flag for replacement. :type replace_samples: bool. :param num_features: the number of split features. :type num_features: int. :param split_random_location: flag for random split location. :type split_random_location: bool. :returns: list of trees :rtype: list :raises: AssertionError """ assert isinstance(X, np.ndarray) assert isinstance(Y, np.ndarray) assert isinstance(num_trees, int) assert isinstance(depth_max, int) assert isinstance(size_min_leaf, int) assert isinstance(ratio_sampling, float) assert isinstance(replace_samples, bool) assert isinstance(num_features, int) assert isinstance(split_random_location, bool) assert len(X.shape) == 2 assert len(Y.shape) == 2 assert X.shape[0] == Y.shape[0] assert Y.shape[1] == 1 if replace_samples: assert ratio_sampling > 0.0 else: assert 0.0 < ratio_sampling <= 1.0 list_trees = [] for _ in range(0, num_trees): X_, Y_ = trees_common.subsample(X, Y, ratio_sampling, replace_samples) root = trees_common._split(X_, Y_, num_features, split_random_location) trees_common.split(root, depth_max, size_min_leaf, num_features, split_random_location, 1) list_trees.append(root) return list_trees
def test_predict_by_trees(): np.random.seed(42) X = np.reshape(np.arange(0, 40), (10, 4)) Y = np.random.randn(10, 1) depth_max = 4 size_min_leaf = 2 num_features = 2 split_random_location = True node_1 = package_target._split(X, Y, num_features, split_random_location) package_target.split(node_1, depth_max, size_min_leaf, num_features, split_random_location, 1) node_2 = package_target._split(X, Y, num_features, split_random_location) package_target.split(node_2, depth_max, size_min_leaf, num_features, split_random_location, 1) node_3 = package_target._split(X, Y, num_features, split_random_location) package_target.split(node_3, depth_max, size_min_leaf, num_features, split_random_location, 1) list_trees = [node_1, node_2, node_3] with pytest.raises(AssertionError) as error: package_target.predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), node_1) with pytest.raises(AssertionError) as error: package_target.predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), 'abc') with pytest.raises(AssertionError) as error: package_target.predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), list_trees) with pytest.raises(AssertionError) as error: package_target.predict_by_trees('abc', list_trees) means, stds = package_target.predict_by_trees(X, list_trees) print(means) print(stds) means_truth = np.array([ [0.33710618], [0.1254467], [0.68947573], [0.9866563], [0.16257799], [0.16258346], [0.85148454], [0.55084716], [0.30721653], [0.30721653], ]) stds_truth = np.array([ [0.29842457], [0.33330409], [0.44398864], [0.72536523], [0.74232577], [0.74232284], [0.83388663], [0.5615399], [0.64331582], [0.64331582], ]) assert isinstance(means, np.ndarray) assert isinstance(stds, np.ndarray) assert len(means.shape) == 2 assert len(stds.shape) == 2 assert means.shape[0] == stds.shape[0] == X.shape[0] assert means.shape[1] == stds.shape[1] == 1 assert np.all(np.abs(means - means_truth) < TEST_EPSILON) assert np.all(np.abs(stds - stds_truth) < TEST_EPSILON) X = np.random.randn(1000, 4) means, stds = package_target.predict_by_trees(X, list_trees) assert isinstance(means, np.ndarray) assert isinstance(stds, np.ndarray) assert len(means.shape) == 2 assert len(stds.shape) == 2 assert means.shape[0] == stds.shape[0] == X.shape[0] assert means.shape[1] == stds.shape[1] == 1
def test_split(): np.random.seed(42) X = np.reshape(np.arange(0, 40), (10, 4)) Y = np.random.randn(10, 1) depth_max = 4 size_min_leaf = 2 num_features = 2 split_random_location = False node = package_target._split(X, Y, num_features, split_random_location) with pytest.raises(AssertionError) as error: package_target.split(node, depth_max, size_min_leaf, num_features, split_random_location, 'abc') with pytest.raises(AssertionError) as error: package_target.split(node, depth_max, size_min_leaf, num_features, split_random_location, 1.0) with pytest.raises(AssertionError) as error: package_target.split(node, depth_max, size_min_leaf, num_features, 'abc', 1) with pytest.raises(AssertionError) as error: package_target.split(node, depth_max, size_min_leaf, 'abc', split_random_location, 1) with pytest.raises(AssertionError) as error: package_target.split(node, depth_max, 'abc', num_features, split_random_location, 1) with pytest.raises(AssertionError) as error: package_target.split(node, 'abc', size_min_leaf, num_features, split_random_location, 1) with pytest.raises(AssertionError) as error: package_target.split(X, depth_max, size_min_leaf, num_features, split_random_location, 1) with pytest.raises(AssertionError) as error: package_target.split('abc', depth_max, size_min_leaf, num_features, split_random_location, 1) package_target.split(node, depth_max, size_min_leaf, num_features, split_random_location, 1) assert isinstance(node, dict)