コード例 #1
0
def test__predict_by_tree():
    np.random.seed(42)

    X = np.reshape(np.arange(0, 40), (10, 4))
    Y = np.random.randn(10, 1)
    depth_max = 4
    size_min_leaf = 2
    num_features = 2
    split_random_location = True

    node = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    with pytest.raises(AssertionError) as error:
        package_target._predict_by_tree(np.array([4.0, 2.0, 3.0, 1.0]), 'abc')
    with pytest.raises(AssertionError) as error:
        package_target._predict_by_tree(X, node)
    with pytest.raises(AssertionError) as error:
        package_target._predict_by_tree('abc', node)

    mean, std = package_target._predict_by_tree(np.array([4.0, 2.0, 3.0, 1.0]),
                                                node)
    print(mean)
    print(std)

    assert mean == 0.179224925920024
    assert std == 0.31748922709120864
コード例 #2
0
def get_random_forest(
    X: np.ndarray,
    Y: np.ndarray,
    num_trees: int,
    depth_max: int,
    size_min_leaf: int,
    num_features: int,
) -> list:
    """
    It returns a random forest.

    :param X: inputs. Shape: (N, d).
    :type X: np.ndarray
    :param Y: outputs. Shape: (N, 1).
    :type Y: str.
    :param num_trees: the number of trees.
    :type num_trees: int.
    :param depth_max: maximum depth of tree.
    :type depth_max: int.
    :param size_min_leaf: minimum size of leaf.
    :type size_min_leaf: int.
    :param num_features: the number of split features.
    :type num_features: int.

    :returns: list of trees
    :rtype: list

    :raises: AssertionError

    """

    assert isinstance(X, np.ndarray)
    assert isinstance(Y, np.ndarray)
    assert isinstance(num_trees, int)
    assert isinstance(depth_max, int)
    assert isinstance(size_min_leaf, int)
    assert isinstance(num_features, int)

    assert len(X.shape) == 2
    assert len(Y.shape) == 2
    assert X.shape[0] == Y.shape[0]
    assert Y.shape[1] == 1

    ratio_sample = 1.0
    replace_samples = True
    split_random_location = False

    list_trees = []

    for _ in range(0, num_trees):
        X_, Y_ = trees_common.subsample(X, Y, ratio_sample, replace_samples)

        root = trees_common._split(X_, Y_, num_features, split_random_location)
        trees_common.split(root, depth_max, size_min_leaf, num_features,
                           split_random_location, 1)

        list_trees.append(root)

    return list_trees
コード例 #3
0
def test__predict_by_trees():
    np.random.seed(42)

    X = np.reshape(np.arange(0, 40), (10, 4))
    Y = np.random.randn(10, 1)
    depth_max = 4
    size_min_leaf = 2
    num_features = 2
    split_random_location = True

    node_1 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_1, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    node_2 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_2, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    node_3 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_3, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    list_trees = [node_1, node_2, node_3]

    with pytest.raises(AssertionError) as error:
        package_target._predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]),
                                         node_1)
    with pytest.raises(AssertionError) as error:
        package_target._predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), 'abc')
    with pytest.raises(AssertionError) as error:
        package_target._predict_by_trees(X, list_trees)
    with pytest.raises(AssertionError) as error:
        package_target._predict_by_trees('abc', list_trees)

    mean, std = package_target._predict_by_trees(
        np.array([4.0, 2.0, 3.0, 1.0]), list_trees)
    print(mean)
    print(std)

    assert mean == 0.12544669602080652
    assert std == 0.3333040901154691
コード例 #4
0
def get_generic_trees(
    X: np.ndarray,
    Y: np.ndarray,
    num_trees: int,
    depth_max: int,
    size_min_leaf: int,
    ratio_sampling: float,
    replace_samples: bool,
    num_features: int,
    split_random_location: bool,
) -> list:
    """
    It returns a list of generic trees.

    :param X: inputs. Shape: (N, d).
    :type X: np.ndarray
    :param Y: outputs. Shape: (N, 1).
    :type Y: str.
    :param num_trees: the number of trees.
    :type num_trees: int.
    :param depth_max: maximum depth of tree.
    :type depth_max: int.
    :param size_min_leaf: minimum size of leaf.
    :type size_min_leaf: int.
    :param ratio_sampling: ratio of dataset subsampling.
    :type ratio_sampling: float
    :param replace_samples: flag for replacement.
    :type replace_samples: bool.
    :param num_features: the number of split features.
    :type num_features: int.
    :param split_random_location: flag for random split location.
    :type split_random_location: bool.

    :returns: list of trees
    :rtype: list

    :raises: AssertionError

    """

    assert isinstance(X, np.ndarray)
    assert isinstance(Y, np.ndarray)
    assert isinstance(num_trees, int)
    assert isinstance(depth_max, int)
    assert isinstance(size_min_leaf, int)
    assert isinstance(ratio_sampling, float)
    assert isinstance(replace_samples, bool)
    assert isinstance(num_features, int)
    assert isinstance(split_random_location, bool)

    assert len(X.shape) == 2
    assert len(Y.shape) == 2
    assert X.shape[0] == Y.shape[0]
    assert Y.shape[1] == 1
    if replace_samples:
        assert ratio_sampling > 0.0
    else:
        assert 0.0 < ratio_sampling <= 1.0

    list_trees = []

    for _ in range(0, num_trees):
        X_, Y_ = trees_common.subsample(X, Y, ratio_sampling, replace_samples)

        root = trees_common._split(X_, Y_, num_features, split_random_location)
        trees_common.split(root, depth_max, size_min_leaf, num_features,
                           split_random_location, 1)

        list_trees.append(root)

    return list_trees
コード例 #5
0
def test_predict_by_trees():
    np.random.seed(42)

    X = np.reshape(np.arange(0, 40), (10, 4))
    Y = np.random.randn(10, 1)
    depth_max = 4
    size_min_leaf = 2
    num_features = 2
    split_random_location = True

    node_1 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_1, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    node_2 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_2, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    node_3 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_3, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    list_trees = [node_1, node_2, node_3]

    with pytest.raises(AssertionError) as error:
        package_target.predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), node_1)
    with pytest.raises(AssertionError) as error:
        package_target.predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), 'abc')
    with pytest.raises(AssertionError) as error:
        package_target.predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]),
                                        list_trees)
    with pytest.raises(AssertionError) as error:
        package_target.predict_by_trees('abc', list_trees)

    means, stds = package_target.predict_by_trees(X, list_trees)
    print(means)
    print(stds)

    means_truth = np.array([
        [0.33710618],
        [0.1254467],
        [0.68947573],
        [0.9866563],
        [0.16257799],
        [0.16258346],
        [0.85148454],
        [0.55084716],
        [0.30721653],
        [0.30721653],
    ])

    stds_truth = np.array([
        [0.29842457],
        [0.33330409],
        [0.44398864],
        [0.72536523],
        [0.74232577],
        [0.74232284],
        [0.83388663],
        [0.5615399],
        [0.64331582],
        [0.64331582],
    ])

    assert isinstance(means, np.ndarray)
    assert isinstance(stds, np.ndarray)
    assert len(means.shape) == 2
    assert len(stds.shape) == 2
    assert means.shape[0] == stds.shape[0] == X.shape[0]
    assert means.shape[1] == stds.shape[1] == 1

    assert np.all(np.abs(means - means_truth) < TEST_EPSILON)
    assert np.all(np.abs(stds - stds_truth) < TEST_EPSILON)

    X = np.random.randn(1000, 4)

    means, stds = package_target.predict_by_trees(X, list_trees)

    assert isinstance(means, np.ndarray)
    assert isinstance(stds, np.ndarray)
    assert len(means.shape) == 2
    assert len(stds.shape) == 2
    assert means.shape[0] == stds.shape[0] == X.shape[0]
    assert means.shape[1] == stds.shape[1] == 1
コード例 #6
0
def test_split():
    np.random.seed(42)

    X = np.reshape(np.arange(0, 40), (10, 4))
    Y = np.random.randn(10, 1)
    depth_max = 4
    size_min_leaf = 2
    num_features = 2
    split_random_location = False

    node = package_target._split(X, Y, num_features, split_random_location)

    with pytest.raises(AssertionError) as error:
        package_target.split(node, depth_max, size_min_leaf, num_features,
                             split_random_location, 'abc')
    with pytest.raises(AssertionError) as error:
        package_target.split(node, depth_max, size_min_leaf, num_features,
                             split_random_location, 1.0)
    with pytest.raises(AssertionError) as error:
        package_target.split(node, depth_max, size_min_leaf, num_features,
                             'abc', 1)
    with pytest.raises(AssertionError) as error:
        package_target.split(node, depth_max, size_min_leaf, 'abc',
                             split_random_location, 1)
    with pytest.raises(AssertionError) as error:
        package_target.split(node, depth_max, 'abc', num_features,
                             split_random_location, 1)
    with pytest.raises(AssertionError) as error:
        package_target.split(node, 'abc', size_min_leaf, num_features,
                             split_random_location, 1)
    with pytest.raises(AssertionError) as error:
        package_target.split(X, depth_max, size_min_leaf, num_features,
                             split_random_location, 1)
    with pytest.raises(AssertionError) as error:
        package_target.split('abc', depth_max, size_min_leaf, num_features,
                             split_random_location, 1)

    package_target.split(node, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)
    assert isinstance(node, dict)